From 0c9f3211521e06c256739524f8ec191032c7bf75 Mon Sep 17 00:00:00 2001 From: TAW Date: Fri, 12 Aug 2022 20:51:35 +0000 Subject: [PATCH 001/102] Compile with C++17 Compile with C++17 Note: Invoking with: _HAS_STD_BYTE=0;_HAS_AUTO_PTR_ETC=1 _HAS_STD_BYTE is to fix that 'byte' is defined in rpc headers as well as in std. _HAS_AUTO_PTR_ETC=1 turns back on a few language elements that were removed from C++17 random_shuffle() mem_fun() auto_ptr() Removing our 1 use of dynamic exceptions, a feature removed from C++17. [Substrate:b9aa2bf313b1fdfc4132e86d29bf7c8b5bab8f86] --- CMakeLists.txt | 15 ++++++++ dev/ese/src/ese/bf.cxx | 2 +- dev/ese/src/ese/dataserializer.cxx | 2 +- .../collectionunit/redblacktree.cxx | 38 +++++++++++++++---- 4 files changed, 48 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 21799e5a..616d2a0d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,6 +51,13 @@ else() message(FATAL_ERROR "NOTE: NOT setting 64-bit OR 32-bit symbols") endif() +# Set the C++ standard to C++17 +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) # Optional: disables compiler-specific extensions like MSVC's /TP +# Alternative, is this other way to do it, that is MSVC specific (so would need to be in if MSVC clause of some kind) +# add_compile_options(/std:c++17) + # CMake chooses reasonable prefixes and suffixes for Unix-like platforms, # but we'd rather keep them the same for now. May revisit this decision later. set(CMAKE_STATIC_LIBRARY_PREFIX) @@ -142,6 +149,14 @@ link_libraries( if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") # Using Microsoft Visual C++ + + # The _HAS_STD_BYTE=0 came in along with the CMAKE_CXX_STANDARD 17 (and was stdcpp17 change + # from the .vcxproj project file world) to make ESE code not conflict. Research suggested this is only need for MSVC, so it is + # set in here. + add_compile_definitions( + _HAS_STD_BYTE=0 + ) + add_compile_options( /Oi # generate intrinsic functions /Ot diff --git a/dev/ese/src/ese/bf.cxx b/dev/ese/src/ese/bf.cxx index 0d585889..ee9d3329 100644 --- a/dev/ese/src/ese/bf.cxx +++ b/dev/ese/src/ese/bf.cxx @@ -14533,7 +14533,7 @@ void BFIMaintCacheStatsITask( VOID *, VOID * pvContext ) // we've failed to acquire the semaphore to schedule the task. this means the task must be executing // right now or in the process of rescheduling itself to complete its job. - S_ASSERT( dtickMaintCacheSizeRequest <= ( dtickMaintCacheStatsPeriod / 2 ) ); + static_assert( dtickMaintCacheSizeRequest <= ( dtickMaintCacheStatsPeriod / 2 ) ); g_cMaintCacheSizeReqAcquireFailures++; diff --git a/dev/ese/src/ese/dataserializer.cxx b/dev/ese/src/ese/dataserializer.cxx index cf51a7d4..c4dcbf53 100644 --- a/dev/ese/src/ese/dataserializer.cxx +++ b/dev/ese/src/ese/dataserializer.cxx @@ -688,7 +688,7 @@ DataSerializer::~DataSerializer() void DataSerializer::SetBindingsToDefault() { - for_each( m_bindings.begin(), m_bindings.end(), mem_fun( &DataBinding::SetToDefault ) ); + for_each( m_bindings.begin(), m_bindings.end(), mem_fn( &DataBinding::SetToDefault ) ); } ERR DataSerializer::ErrSaveBindings( IDataStore * const pstore ) diff --git a/test/ese/src/devlibtest/collection/collectionunit/redblacktree.cxx b/test/ese/src/devlibtest/collection/collectionunit/redblacktree.cxx index 5ee82ae5..7f34e156 100644 --- a/test/ese/src/devlibtest/collection/collectionunit/redblacktree.cxx +++ b/test/ese/src/devlibtest/collection/collectionunit/redblacktree.cxx @@ -5,6 +5,30 @@ using namespace std; #include "collectionunittest.hxx" +// Borrowed from an implementation of random_shuffle(), used here +// to make shuffle() behave like random_shuffle. +struct _Rand_urng_from_func +{ // wrap rand() as a URNG + typedef unsigned int result_type; + + static result_type (min)() + { // return minimum possible generated value + return (0); + } + + static result_type (max)() + { // return maximum possible generated value + return (RAND_MAX); + } + + result_type operator()() + { // invoke rand() + return (_CSTD rand()); + } +}; + +_Rand_urng_from_func _ShuffleRandFunc; + // node constructor zeroes members and sets the color to red CUnitTest( RedBlackTreeINodeConstructor, 0, "" ); ERR RedBlackTreeINodeConstructor::ErrTest() @@ -528,12 +552,12 @@ ERR RedBlackTreeRandomInserts::ErrTest() CRedBlackTree tree; - random_shuffle(keys, keys+_countof(keys)); + shuffle(keys, keys+_countof(keys), _ShuffleRandFunc); TestCall(ErrInsertAll(tree, keys, _countof(keys))); TestCall(ErrRetrieveAll(tree, keys, _countof(keys))); - random_shuffle(keys, keys+_countof(keys)); + shuffle(keys, keys+_countof(keys), _ShuffleRandFunc); TestCall(ErrDeleteAll(tree, keys, _countof(keys))); HandleError: @@ -556,7 +580,7 @@ ERR RedBlackTreeInvasiveMakeEmpty::ErrTest() CRedBlackTree::BaseType itree; - random_shuffle( keys, keys + _countof( keys ) ); + shuffle( keys, keys + _countof( keys ), _ShuffleRandFunc ); TestCall( ErrInsertAll( itree, keys, _countof( keys ), &prgNodes ) ); TestCall( ErrRetrieveAll( itree, keys, _countof( keys ) ) ); itree.MakeEmpty(); @@ -594,7 +618,7 @@ ERR RedBlackTreeMakeEmpty::ErrTest() CRedBlackTree tree; - random_shuffle(keys, keys+_countof(keys)); + shuffle(keys, keys+_countof(keys), _ShuffleRandFunc); TestCall(ErrInsertAll(tree, keys, _countof(keys))); TestCall(ErrRetrieveAll(tree, keys, _countof(keys))); tree.MakeEmpty(); @@ -621,11 +645,11 @@ ERR RedBlackTreeInsertDeleteInsert::ErrTest() CRedBlackTree tree; - random_shuffle(keys1, keys1+_countof(keys1)); - random_shuffle(keys2, keys2+_countof(keys2)); + shuffle(keys1, keys1+_countof(keys1), _ShuffleRandFunc); + shuffle(keys2, keys2+_countof(keys2), _ShuffleRandFunc); TestCall(ErrInsertAll(tree, keys1, _countof(keys1))); TestCall(ErrRetrieveAll(tree, keys1, _countof(keys1))); - random_shuffle(keys1, keys1+_countof(keys1)); + shuffle(keys1, keys1+_countof(keys1), _ShuffleRandFunc); TestCall(ErrDeleteAll(tree, keys1, _countof(keys1)/2)); TestCall(ErrInsertAll(tree, keys2, _countof(keys2))); TestCall(ErrRetrieveAll(tree, keys2, _countof(keys2))); From 999a59dab7a090199d67cb8f12dd84d21e057ba5 Mon Sep 17 00:00:00 2001 From: TAW Date: Mon, 15 Aug 2022 21:50:22 +0000 Subject: [PATCH 002/102] Prepare to replace C_ASSERT with static_assert Some compilers have problems with our C_ASSERT macro. This check-in creates a GCR style tool to bulk replace, but does not yet bulk replace. It also tweaks some .pl files that won't be caught by the bulk replace tool. The next checkin will be the bulk replace. There will be one more after that that does a few things by hand-editing (like removing our macro definition). [Substrate:29cb5ff81f2f1a4b02eb95b516f49f17ab465940] --- dev/ese/src/_perfctrs/perfdata.pl | 2 +- dev/ese/src/ese/genapitable.pl | 2 +- dev/ese/src/noncore/interop/grbits.h | 2 +- dev/ese/src/noncore/interop/move.h | 2 +- .../src/blue/src/esetest/esetest/bounce/generate_ese_stubs.pl | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dev/ese/src/_perfctrs/perfdata.pl b/dev/ese/src/_perfctrs/perfdata.pl index af37e74d..e117e6a7 100644 --- a/dev/ese/src/_perfctrs/perfdata.pl +++ b/dev/ese/src/_perfctrs/perfdata.pl @@ -639,7 +639,7 @@ sub PopulatePerfDataTemplate }; // These two objects should be the same. -C_ASSERT( sizeof( PerfDataTemplateReadOnly ) == sizeof( PerfDataTemplateReadWrite ) ); +static_assert( sizeof( PerfDataTemplateReadOnly ) == sizeof( PerfDataTemplateReadWrite ) ); EOF9 diff --git a/dev/ese/src/ese/genapitable.pl b/dev/ese/src/ese/genapitable.pl index 486ca9b8..412bc4ee 100644 --- a/dev/ese/src/ese/genapitable.pl +++ b/dev/ese/src/ese/genapitable.pl @@ -63,7 +63,7 @@ print OUTPUTFILE < Date: Tue, 16 Aug 2022 17:39:45 +0000 Subject: [PATCH 003/102] Added simulation of subsampling of cache traces in resmgrreplay Added the possibility to subsample a complete trace in resmgrreplay according to the method presented in Waldspurger, C. A., Park, N., Garthwaite, A., & Ahmad, I. (2015). Efficient MRC construction with SHARDS. In FAST 15. Subsampling the cache trace according to this method should enable to simulate the impact of different cache sizes or checkpoint depth from a much smaller trace without lose much accuracy. This change in resmgrreplay helps to measure how much accuracy can be lost due to subsampling. [Substrate:8a9ee7ee7971f6ad705449d3e8defef80cd1dfb4] --- .../resmgr/resmgremulator/rmemulator.cxx | 71 +++++- .../resmgr/resmgremulator/rmemulator.hxx | 10 + .../resmgr/resmgremulatorunit/basic.cxx | 211 ++++++++++++++++++ .../resmgr/resmgremulatorunit/negative.cxx | 27 +++ .../devlibtest/resmgr/resmgrreplay/main.cxx | 27 +++ 5 files changed, 345 insertions(+), 1 deletion(-) diff --git a/test/ese/src/devlibtest/resmgr/resmgremulator/rmemulator.cxx b/test/ese/src/devlibtest/resmgr/resmgremulator/rmemulator.cxx index fcc650ec..fc41c15e 100644 --- a/test/ese/src/devlibtest/resmgr/resmgremulator/rmemulator.cxx +++ b/test/ese/src/devlibtest/resmgr/resmgremulator/rmemulator.cxx @@ -154,7 +154,9 @@ PageEvictionEmulator::PageEvictionEmulator() : m_pbfftlContext( NULL ), m_pipeaImplementation( NULL ), m_cbPage( 0 ), - m_cpgChunk( 0 ) + m_cpgChunk( 0 ), + m_dwSamplingRatio( 1 ), + m_dwSamplingSeed( 0 ) { m_arrayDirtyPageOps.SetEntryDefault( NULL ); ResetConfig(); @@ -546,6 +548,21 @@ ERR PageEvictionEmulator::ErrSetLifetimeHistoRes( const TICK dtickLifetimeHistoR return JET_errSuccess; } +ERR PageEvictionEmulator::ErrSetSamplingParameters( const DWORD dwSamplingRatio, + const DWORD dwSamplingSeed ) +{ + Enforce( m_state == PageEvictionEmulator::peesUninitialized ); + if ( ( dwSamplingRatio < 1 ) ) + { + return JET_errInvalidParameter; + } + + m_dwSamplingRatio = dwSamplingRatio; + m_dwSamplingSeed = dwSamplingSeed; + + return JET_errSuccess; +} + ERR PageEvictionEmulator::ErrExecute() { ERR err = JET_errSuccess; @@ -1739,6 +1756,18 @@ void PageEvictionEmulator::DumpHistogram_( CPerfectHistogramStats& histogram, Enforce( errStats == CStats::ERR::errSuccess ); } + +inline bool PageEvictionEmulator::FSamplePage( const IFMPPGNO& ifmppgno ) +{ + if ( m_dwSamplingRatio == 1 ) + { + return true; + } + // Subsampling the set of pages on which to replay the trace. + // Based on Waldspurger, C. A., Park, N., Garthwaite, A., & SOMEONE, I. (2015). Efficient MRC construction with SHARDS. In FAST 15 + return ( ( ifmppgno.Hash() + m_dwSamplingSeed ) % m_dwSamplingRatio ) == 0; +} + void PageEvictionEmulator::TouchPage_( PAGEENTRY* const ppge, const BFTRACE::BFTouch_& bftouch ) { Enforce( ppge->ppage != NULL ); // Page must be cached from our end. @@ -1950,6 +1979,12 @@ ERR PageEvictionEmulator::ErrProcessTraceCache_( BFTRACE& bftrace ) BFTRACE::BFCache_ bfcache = bftrace.bfcache; const IFMPPGNO ifmppgno( bfcache.ifmp, bfcache.pgno ); + + if ( !FSamplePage( ifmppgno ) ) + { + goto HandleError; + } + PAGEENTRY* const ppge = PpgeGetEntry_( ifmppgno ); PageEvictionEmulator::STATS& stats = m_stats[ ifmppgno.ifmp ]; @@ -2097,6 +2132,12 @@ ERR PageEvictionEmulator::ErrProcessTraceTouch_( BFTRACE& bftrace ) BFTRACE::BFTouch_ bftouch = bftrace.bftouch; const IFMPPGNO ifmppgno( bftouch.ifmp, bftouch.pgno ); + + if ( !FSamplePage( ifmppgno ) ) + { + goto HandleError; + } + PAGEENTRY* const ppge = PpgeGetEntry_( ifmppgno ); PageEvictionEmulator::STATS& stats = m_stats[ ifmppgno.ifmp ]; @@ -2229,6 +2270,12 @@ ERR PageEvictionEmulator::ErrProcessTraceSuperCold_( const BFTRACE& bftrace ) const BFTRACE::BFSuperCold_& bfsupercold = bftrace.bfsupercold; const IFMPPGNO ifmppgno( bfsupercold.ifmp, bfsupercold.pgno ); + + if ( !FSamplePage( ifmppgno ) ) + { + goto HandleError; + } + PAGEENTRY* const ppge = PpgeGetEntry_( ifmppgno ); m_stats[ ifmppgno.ifmp ].cSuperColdedReal++; @@ -2262,6 +2309,12 @@ ERR PageEvictionEmulator::ErrProcessTraceEvict_( BFTRACE& bftrace ) } const IFMPPGNO ifmppgno( bfevict.ifmp, bfevict.pgno ); + + if ( !FSamplePage( ifmppgno ) ) + { + goto HandleError; + } + PAGEENTRY* const ppgeOriginal = PpgeGetEntry_( ifmppgno ); m_stats[ ifmppgno.ifmp ].cEvictionsReal++; @@ -2373,6 +2426,11 @@ ERR PageEvictionEmulator::ErrProcessTraceDirty_( const BFTRACE& bftrace ) LGPOS lgposModify( bftrace.bfdirty.lgenModify, bftrace.bfdirty.isecModify, bftrace.bfdirty.ibModify ); const IFMPPGNO ifmppgno( bftrace.bfdirty.ifmp, bftrace.bfdirty.pgno ); + if ( !FSamplePage( ifmppgno ) ) + { + return JET_errSuccess; + } + // We had a tracing bug in the engine, in which the lgposModify of the dirty operation // would be logged as the current lgposModify of the page, prior to it being stamped // with the new lgposModify associated to that particular dirty operation. Assume the new @@ -2409,6 +2467,12 @@ ERR PageEvictionEmulator::ErrProcessTraceWrite_( const BFTRACE& bftrace ) const BFTRACE::BFWrite_& bfwrite = bftrace.bfwrite; bool fReplayTrace = false; const IFMPPGNO ifmppgno( bfwrite.ifmp, bfwrite.pgno ); + + if ( !FSamplePage( ifmppgno ) ) + { + return JET_errSuccess; + } + PAGEENTRY* const ppge = PpgeGetEntry_( ifmppgno ); m_stats[ ifmppgno.ifmp ].cWritesReal++; @@ -2461,6 +2525,11 @@ ERR PageEvictionEmulator::ErrProcessTraceSetLgposModify_( const BFTRACE& bftrace LGPOS lgposModify( bftrace.bfsetlgposmodify.lgenModify, bftrace.bfsetlgposmodify.isecModify, bftrace.bfsetlgposmodify.ibModify ); const IFMPPGNO ifmppgno( bftrace.bfsetlgposmodify.ifmp, bftrace.bfsetlgposmodify.pgno ); + if ( !FSamplePage( ifmppgno ) ) + { + return JET_errSuccess; + } + Enforce( lgposModify.FIsSet() ); ModifyPage_( ifmppgno, lgposModify ); diff --git a/test/ese/src/devlibtest/resmgr/resmgremulator/rmemulator.hxx b/test/ese/src/devlibtest/resmgr/resmgremulator/rmemulator.hxx index 5f6ea50c..bb067897 100644 --- a/test/ese/src/devlibtest/resmgr/resmgremulator/rmemulator.hxx +++ b/test/ese/src/devlibtest/resmgr/resmgremulator/rmemulator.hxx @@ -179,6 +179,8 @@ public: class PageEvictionEmulator // ================================================================ { + friend class ResMgrEmulatorBasicTest; // Used to access function FSamplePage from the unit test class ResMgrEmulatorBasicTest. + public: // Possible cache sizing policies. @@ -375,6 +377,8 @@ public: void SetPrintHistograms( const bool fPrintHistograms ); // Whether or not to print histograms. ERR ErrSetFaultsHistoRes( const ULONG cFaultsHistoRes ); // See m_cFaultsHistoRes below, default is 1. ERR ErrSetLifetimeHistoRes( const TICK dtickLifetimeHistoRes ); // See m_dtickLifetimeHistoRes below, default is 1. + ERR ErrSetSamplingParameters( const DWORD dwSamplingRatio, // See m_dwSamplingRatio below, default is 1. + const DWORD dwSamplingSeed ); // See m_dwSamplingSeed below, default is 0. // Runs through a BFFTL trace. @@ -472,6 +476,10 @@ private: bool m_fSetLgposModifySupported; // Whether or not the traces being processed support SetLgposModify. + DWORD m_dwSamplingRatio; // Sampling ratio, used to replay the events on 1/m_dwSamplingRatio pages. + + DWORD m_dwSamplingSeed; // Sampling seed, used to 'seed' the subsampling. + // Helpers. void ResetConfig(); // Resets the emulator configuration. @@ -504,6 +512,8 @@ private: const SAMPLE sampleRes, const bool fDumpDetails ); + bool FSamplePage( const IFMPPGNO& ifmppgno ); + // These functions update the state of the pages in the internal emulator // data structures and accumulate statistics. They do not replay the traces. diff --git a/test/ese/src/devlibtest/resmgr/resmgremulatorunit/basic.cxx b/test/ese/src/devlibtest/resmgr/resmgremulatorunit/basic.cxx index fbd0e068..47da2181 100644 --- a/test/ese/src/devlibtest/resmgr/resmgremulatorunit/basic.cxx +++ b/test/ese/src/devlibtest/resmgr/resmgremulatorunit/basic.cxx @@ -64,6 +64,7 @@ class ResMgrEmulatorBasicTest : public UNITTEST ERR ErrFixedChkptDepthModeMediumDepth_( const bool fUseSetLgposModifyTrace ); ERR ErrFixedChkptDepthModeBigDepth_( const bool fUseSetLgposModifyTrace ); ERR ErrFixedChkptDepthModeBigDepthSmallCache_( const bool fUseSetLgposModifyTrace ); + ERR ErrSubSampling_(); }; ResMgrEmulatorBasicTest ResMgrEmulatorBasicTest::s_instance; @@ -133,6 +134,9 @@ ERR ResMgrEmulatorBasicTest::ErrTest() TestCall( ErrFixedChkptDepthModeBigDepth_( true /* fUseSetLgposModifyTrace */ ) ); TestCall( ErrFixedChkptDepthModeBigDepthSmallCache_( true /* fUseSetLgposModifyTrace */ ) ); + // SubSampling + TestCall( ErrSubSampling_() ); + HandleError: return err; } @@ -3956,3 +3960,210 @@ ERR ResMgrEmulatorBasicTest::ErrFixedChkptDepthModeBigDepthSmallCache_( const bo return err; } + +// ================================================================ +ERR ResMgrEmulatorBasicTest::ErrSubSampling_() +// ================================================================ +{ + ERR err = JET_errSuccess; + + printf( "\t%s\r\n", __FUNCTION__ ); + + PageEvictionEmulator& emulator = PageEvictionEmulator::GetEmulatorObj(); + BFFTLContext* pbfftlc = NULL; + PageEvictionAlgorithmLRUTest algorithm; + + // Initialize the sampling in the emulator to be able to count the events that will be sampled. + TestCall( emulator.ErrSetSamplingParameters( 5, 0 ) ); + // Count sampled events in each of the phases of the scenario + int cSampledCache = 0; + int cSampledEvict = 0; + int cSampledCacheB = 0; + int cSampledTouch = 0; + + // Scenario: + // - Init; (1) + // - Cache 100 pages; (101) + // - Evict/scavenge first 95 pages (LRU-1 behavior); (196) + // - Cache pages 91-95; (201) + // - Touch pages 96-100; (206) + // - Term; (207) + // - Sentinel. (208) + + BFTRACE* const rgbftrace = new BFTRACE[208]; + + if ( rgbftrace == NULL ) + { + return NULL; + } + + memset( rgbftrace, 0, 208 * sizeof( BFTRACE ) ); + size_t iTrace = 0; + TICK tick = 0; + + // - Init; (1) + + tick += 200; + rgbftrace[iTrace].tick = tick; + rgbftrace[iTrace].traceid = bftidSysResMgrInit; + rgbftrace[iTrace].bfinit.K = 1; + rgbftrace[iTrace].bfinit.csecCorrelatedTouch = 0.128; + rgbftrace[iTrace].bfinit.csecTimeout = 100.0; + rgbftrace[iTrace].bfinit.csecUncertainty = 0.1; + rgbftrace[iTrace].bfinit.dblHashLoadFactor = 5.0; + rgbftrace[iTrace].bfinit.dblHashUniformity = 1.0; + rgbftrace[iTrace].bfinit.dblSpeedSizeTradeoff = 0.0; + + iTrace++; + + // - Cache 100 pages; (101) + + for ( PGNO pgno = 1; iTrace < 101; iTrace++ ) + { + tick += 200; + rgbftrace[iTrace].tick = tick; + rgbftrace[iTrace].traceid = bftidCache; + BFTRACE::BFCache_* pbfcache = &rgbftrace[iTrace].bfcache; + pbfcache->ifmp = 0; + pbfcache->pgno = pgno; + pbfcache->pctPri = 100; + pbfcache->fUseHistory = true; + pbfcache->fNewPage = false; + if ( emulator.FSamplePage( IFMPPGNO( 0, pgno ) ) ) + { + cSampledCache++; + } + pgno++; + } + + // - Evict/scavenge first 95 pages (LRU-1 behavior); (196) + + for ( PGNO pgno = 1; iTrace < 196; iTrace++ ) + { + tick += 200; + rgbftrace[iTrace].tick = tick; + rgbftrace[iTrace].traceid = bftidEvict; + BFTRACE::BFEvict_* pbfevict = &rgbftrace[iTrace].bfevict; + pbfevict->ifmp = 0; + pbfevict->pgno = pgno; + pbfevict->fCurrentVersion = fTrue; + pbfevict->pctPri = 100; + pbfevict->bfef = bfefReasonAvailPool; + if ( emulator.FSamplePage( IFMPPGNO( 0, pgno ) ) ) + { + cSampledEvict++; + } + pgno++; + } + + // - Cache pages 91-95; (201) + + for ( PGNO pgno = 91; iTrace < 201; iTrace++ ) + { + tick += 200; + rgbftrace[iTrace].tick = tick; + rgbftrace[iTrace].traceid = bftidCache; + BFTRACE::BFCache_* pbfcache = &rgbftrace[iTrace].bfcache; + pbfcache->ifmp = 0; + pbfcache->pgno = pgno; + pbfcache->pctPri = 100; + pbfcache->fUseHistory = true; + pbfcache->fNewPage = false; + if ( emulator.FSamplePage( IFMPPGNO( 0, pgno ) ) ) + { + cSampledCacheB++; + } + pgno++; + } + + // - Touch pages 96-100; (206) + + for ( PGNO pgno = 96; iTrace < 206; iTrace++ ) + { + tick += 200; + rgbftrace[iTrace].tick = tick; + rgbftrace[iTrace].traceid = bftidTouch; + BFTRACE::BFTouch_* pbftouch = &rgbftrace[iTrace].bftouch; + pbftouch->ifmp = 0; + pbftouch->pgno = pgno; + pbftouch->pctPri = 100; + pbftouch->fUseHistory = true; + if ( emulator.FSamplePage( IFMPPGNO( 0, pgno ) ) ) + { + cSampledTouch++; + } + pgno++; + } + + // - Term; (207) + + tick += 200; + rgbftrace[iTrace].tick = tick; + rgbftrace[iTrace].traceid = bftidSysResMgrTerm; + iTrace++; + + // - Sentinel. (208) + + rgbftrace[iTrace].traceid = bftidInvalid; + // End creation of trace + + TestCheck( rgbftrace != NULL ); + + // Init driver. + + TestCall( ErrBFFTLInit( rgbftrace, fBFFTLDriverTestMode, &pbfftlc ) ); + + // Fill in database info. + + pbfftlc->cIFMP = 1; + pbfftlc->rgpgnoMax[0] = 100; + + // Init./run. + + TestCall( emulator.ErrSetCacheSize( PageEvictionEmulator::peecspVariable ) ); + TestCall( emulator.ErrInit( pbfftlc, &algorithm ) ); + TestCall( emulator.ErrExecute() ); + + // Validation. + + const PageEvictionEmulator::STATS_AGG& stats = emulator.GetStats(); + TestCall( emulator.ErrDumpStats( false ) ); + + TestCheck( stats.cpgCachedMax == cSampledCache ); + TestCheck( stats.cRequestedUnique == cSampledCache ); + TestCheck( stats.cRequested == ( cSampledCache + cSampledCacheB + cSampledTouch ) ); + TestCheck( stats.cResMgrCycles == 1 ); + TestCheck( stats.cResMgrAbruptCycles == 0 ); + TestCheck( stats.cDiscardedTraces == 0 ); + TestCheck( stats.cOutOfRangeTraces == 0 ); + TestCheck( stats.cFaultsReal == ( cSampledCache + cSampledCacheB ) ); + TestCheck( stats.cFaultsSim == ( cSampledCache + cSampledCacheB ) ); + TestCheck( stats.cFaultsRealAvoidable == cSampledCacheB ); + TestCheck( stats.cFaultsSimAvoidable == cSampledCacheB ); + TestCheck( stats.cTouchesReal == cSampledTouch ); + TestCheck( stats.cTouchesSim == cSampledTouch ); + TestCheck( stats.cCaches == ( cSampledCacheB + cSampledCache ) ); + TestCheck( stats.cTouches == cSampledTouch ); + TestCheck( stats.cCachesTurnedTouch == 0 ); + TestCheck( stats.cTouchesTurnedCache == 0 ); + TestCheck( stats.cEvictionsReal == cSampledEvict ); + TestCheck( stats.cEvictionsSim == ( cSampledEvict + cSampledCacheB + cSampledTouch ) ); + TestCheck( stats.cEvictionsFailed == 0 ); + TestCheck( stats.cEvictionsCacheTooBig == 0 ); + TestCheck( stats.cEvictionsCacheTooOld == 0 ); + TestCheck( stats.cEvictionsPurge == ( cSampledCacheB + cSampledTouch ) ); + + TestCheck( stats.pctCacheFaultRateReal == 100.0 * ( cSampledCache + cSampledCacheB ) / ( cSampledCache + cSampledCacheB + cSampledTouch ) ); + TestCheck( stats.pctCacheFaultRateSim == 100.0 * ( cSampledCache + cSampledCacheB ) / ( cSampledCache + cSampledCacheB + cSampledTouch ) ); + TestCheck( stats.pctCacheFaultRateRealAvoidable == 100.0 * cSampledCacheB / ( cSampledCache + cSampledCacheB + cSampledTouch ) ); + TestCheck( stats.pctCacheFaultRateSimAvoidable == 100.0 * cSampledCacheB / ( cSampledCache + cSampledCacheB + cSampledTouch ) ); + TestCheck( stats.pctCacheSizeRatioSim == -1.0 ); + +HandleError: + + emulator.Term(); + BFFTLTerm( pbfftlc ); + delete[] rgbftrace; + + return err; +} \ No newline at end of file diff --git a/test/ese/src/devlibtest/resmgr/resmgremulatorunit/negative.cxx b/test/ese/src/devlibtest/resmgr/resmgremulatorunit/negative.cxx index b191872c..5db0a752 100644 --- a/test/ese/src/devlibtest/resmgr/resmgremulatorunit/negative.cxx +++ b/test/ese/src/devlibtest/resmgr/resmgremulatorunit/negative.cxx @@ -36,6 +36,7 @@ class ResMgrEmulatorNegativeTest : public UNITTEST ERR ErrDumpStatsNoInit_(); ERR ErrDumpStatsNoExecute_(); ERR ErrDumpStatsTerm_(); + ERR ErrSubSamplingInvalidParams_(); }; ResMgrEmulatorNegativeTest ResMgrEmulatorNegativeTest::s_instance; @@ -62,6 +63,7 @@ ERR ResMgrEmulatorNegativeTest::ErrTest() TestCall( ErrDumpStatsNoInit_() ); TestCall( ErrDumpStatsNoExecute_() ); TestCall( ErrDumpStatsTerm_() ); + TestCall( ErrSubSamplingInvalidParams_() ); HandleError: return err; @@ -276,3 +278,28 @@ ERR ResMgrEmulatorNegativeTest::ErrDumpStatsTerm_() return err; } +// ================================================================ +ERR ResMgrEmulatorNegativeTest::ErrSubSamplingInvalidParams_() +// ================================================================ +{ + ERR err = JET_errSuccess; + + printf( "\t%s\r\n", __FUNCTION__ ); + + PageEvictionEmulator& emulator = PageEvictionEmulator::GetEmulatorObj(); + BFFTLContext* pbfftlc = NULL; + BFTRACE bftrace = { 0 }; + bftrace.traceid = bftidInvalid; + PageEvictionAlgorithmLRUTest algorithm; + + TestCall( ErrBFFTLInit( &bftrace, fBFFTLDriverTestMode, &pbfftlc ) ); + + TestCheck( emulator.ErrSetSamplingParameters( 0, 0 ) == JET_errInvalidParameter ); + +HandleError: + + BFFTLTerm( pbfftlc ); + + return err; +} + diff --git a/test/ese/src/devlibtest/resmgr/resmgrreplay/main.cxx b/test/ese/src/devlibtest/resmgr/resmgrreplay/main.cxx index 97e7811b..ab00b82c 100644 --- a/test/ese/src/devlibtest/resmgr/resmgrreplay/main.cxx +++ b/test/ese/src/devlibtest/resmgr/resmgrreplay/main.cxx @@ -44,6 +44,10 @@ static void PrintHelp( const WCHAR * const wszApplication ) fprintf( stderr, " /PrintSamples : prints out simulation samples, in sec (default: do not print samples). (/Emulate only)\n" ); fprintf( stderr, " /NoHisto: do not print out histograms. (default: print histograns). (/Emulate only)\n" ); fprintf( stderr, " /Ifmp : specific IFMP to report results on (default: all IFMPs). Note, the simulation is always performed with all IFMPs. (/Emulate only)\n" ); + fprintf( stderr, " /SubSampling : Subsample events by keeping on average one event out of event. is used to change the set of subsampled pages.\n" ); + fprintf( stderr, " Note that to estimate the performance of the cache using a subsampled trace, the cache size must also be scaled by 1/.\n" ); + fprintf( stderr, " Similarly, event counters in the statistics must be multiplied by to estimate their values under the full workload.\n" ); + fprintf( stderr, " must be at least 1, must be positive. (/Emulate only)\n" ); } // ================================================================ @@ -159,6 +163,8 @@ INT _cdecl wmain( INT argc, __in_ecount(argc) LPWSTR argv[] ) Alloc( pids ); cacheSizes = new std::set(); Alloc( cacheSizes ); + DWORD dwSamplingRatio = 1; + DWORD dwSamplingSeed = 1; if ( ( 0 == _wcsicmp( argv[1], L"/Ftl" ) ) || ( 0 == _wcsicmp( argv[1], L"-Ftl" ) ) ) { @@ -634,6 +640,25 @@ INT _cdecl wmain( INT argc, __in_ecount(argc) LPWSTR argv[] ) Error( ErrERRCheck( JET_errInvalidParameter ) ); } } + else if ( 0 == _wcsicmp( argv[iarg], L"/SubSampling" ) || 0 == _wcsicmp( argv[iarg], L"-SubSampling" ) ) + { + if ( ( iarg + 2 ) < argc ) + { + dwSamplingRatio = (DWORD)_wtoi( argv[iarg + 1] ); + dwSamplingSeed = (DWORD)_wtoi( argv[iarg + 2] ); + iarg += 2; + if ( dwSamplingRatio < 1 ) + { + wprintf( L"Invalid sampling options.\n" ); + Error( ErrERRCheck( JET_errInvalidParameter ) ); + } + } + else + { + wprintf( L"Insufficient number of arguments for /SubSampling option.\n" ); + Error( ErrERRCheck( JET_errInvalidParameter ) ); + } + } else { wprintf( L"Invalid argument: %ws.\n", argv[iarg] ); @@ -888,6 +913,7 @@ INT _cdecl wmain( INT argc, __in_ecount(argc) LPWSTR argv[] ) emulator.SetPrintHistograms( fPrintHistograms ); Call( emulator.ErrSetFaultsHistoRes( cCachedHistoRes ) ); Call( emulator.ErrSetLifetimeHistoRes( dtickLifetimeHistoRes ) ); + Call( emulator.ErrSetSamplingParameters( dwSamplingRatio, dwSamplingSeed ) ); Call( emulator.ErrInit( pbfftlc, pipea ) ); @@ -1019,6 +1045,7 @@ INT _cdecl wmain( INT argc, __in_ecount(argc) LPWSTR argv[] ) emulator.SetPrintHistograms( fPrintHistograms ); Call( emulator.ErrSetFaultsHistoRes( cCachedHistoRes ) ); Call( emulator.ErrSetLifetimeHistoRes( dtickLifetimeHistoRes ) ); + Call( emulator.ErrSetSamplingParameters( dwSamplingRatio, dwSamplingSeed ) ); if ( ( rmmode == rmemCacheSizeIteration ) || ( rmmode == rmemCacheSizeFixedIteration ) || ( rmmode == rmemCacheSizeIterationAvoidable ) || ( rmmode == rmemCacheFaultIteration ) || ( rmmode == rmemCacheFaultIterationAvoidable ) ) From 7f7ca2c74be832673065cf22272044ff09109fe4 Mon Sep 17 00:00:00 2001 From: Brett Shirley Date: Tue, 16 Aug 2022 19:00:00 +0000 Subject: [PATCH 004/102] OSSBUILD: Uninit variable defense is stronger in github test code. --- .../resmgr/resmgremulatorunit/basic.cxx | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test/ese/src/devlibtest/resmgr/resmgremulatorunit/basic.cxx b/test/ese/src/devlibtest/resmgr/resmgremulatorunit/basic.cxx index 47da2181..894bacd8 100644 --- a/test/ese/src/devlibtest/resmgr/resmgremulatorunit/basic.cxx +++ b/test/ese/src/devlibtest/resmgr/resmgremulatorunit/basic.cxx @@ -3973,14 +3973,6 @@ ERR ResMgrEmulatorBasicTest::ErrSubSampling_() BFFTLContext* pbfftlc = NULL; PageEvictionAlgorithmLRUTest algorithm; - // Initialize the sampling in the emulator to be able to count the events that will be sampled. - TestCall( emulator.ErrSetSamplingParameters( 5, 0 ) ); - // Count sampled events in each of the phases of the scenario - int cSampledCache = 0; - int cSampledEvict = 0; - int cSampledCacheB = 0; - int cSampledTouch = 0; - // Scenario: // - Init; (1) // - Cache 100 pages; (101) @@ -4001,6 +3993,14 @@ ERR ResMgrEmulatorBasicTest::ErrSubSampling_() size_t iTrace = 0; TICK tick = 0; + // Initialize the sampling in the emulator to be able to count the events that will be sampled. + TestCall( emulator.ErrSetSamplingParameters( 5, 0 ) ); + // Count sampled events in each of the phases of the scenario + int cSampledCache = 0; + int cSampledEvict = 0; + int cSampledCacheB = 0; + int cSampledTouch = 0; + // - Init; (1) tick += 200; From c9f4f30ea9a8bec72c71903aaacf59165dd4202d Mon Sep 17 00:00:00 2001 From: Alexandre Costa Date: Wed, 17 Aug 2022 16:40:44 +0000 Subject: [PATCH 005/102] Replace buggy .NET Framework event log reader with our own implementation. "If there aren't any good wheels around, just invent your own." - Unknown author. We've had a C implementation of an event log reader since the days when we would mostly write tests in native code, so this change implements an interop layer on top of that implementation and replaces the System.Diagnostics.EventLog uses throughout our test code. That class has a few bugs that have called for ugly workarounds, which have now been removed. [Substrate:b8a242c84f7d5ce27da55d5bc80cda1b8b42693d] --- .../src/blue/src/esetest/esetest/eventlog.cxx | 30 +++++++++++-------- test/ese/src/inc/esetest.h | 2 ++ 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/test/ese/src/blue/src/esetest/esetest/eventlog.cxx b/test/ese/src/blue/src/esetest/esetest/eventlog.cxx index 55b09d6f..c4ab6282 100644 --- a/test/ese/src/blue/src/esetest/esetest/eventlog.cxx +++ b/test/ese/src/blue/src/esetest/esetest/eventlog.cxx @@ -17,7 +17,7 @@ const wchar_t * const g_mwszzAdvapi32CoreSystemBroken = wszAdvapi32 L"\0"; typedef struct{ - PFNEVENTLOGGING pfnCallback; + PFNEVENTLOGGING pfnCallback; HANDLE hEventLog; PWSTR* pwszEventSources; size_t cEventSources; @@ -34,11 +34,7 @@ typedef struct{ PBYTE pBuffer; DWORD cbBufferSize; HANDLE hObjects[ 2 ]; - - // Won't be used after initialization. For debugging purposes only. PWSTR wszEventLog; - SYSTEMTIME stTimeMin; - SYSTEMTIME stTimeMax; PWSTR wszLogFile; BOOL fThreadActive; } EseEventLoggingQuery; @@ -78,6 +74,7 @@ IEventLoggingProcessEvents( DWORD cbNeed; PBYTE pBufferAux, pBuffer2; EVENTLOGRECORD* pelg; + DWORD dwRecordNumber; PWSTR wszEventSource; DWORD dwTimeGenerated; WORD wEventType; @@ -123,6 +120,7 @@ IEventLoggingProcessEvents( // Process all the events returned. while ( ( ( DWORD )( pBufferAux - hQuery->pBuffer ) ) < cbRead ){ pelg = ( EVENTLOGRECORD* )pBufferAux; + dwRecordNumber = pelg->RecordNumber; wszEventSource = ( WCHAR* )( pelg + 1 ); dwTimeGenerated = pelg->TimeGenerated; wEventType = pelg->EventType; @@ -218,6 +216,7 @@ IEventLoggingProcessEvents( // Callback. if ( hQuery->pfnCallback ){ hQuery->pfnCallback( hQuery->wszEventLog, + dwRecordNumber, wszEventSource, &stTimeGenerated, wEventType, @@ -238,6 +237,10 @@ IEventLoggingProcessEvents( ESETEST_ELHELPER_STRING_BUFFER_SIZE, "Event log: %S" CRLF, hQuery->wszEventLog ); IEventLoggingPrintToFile( hQuery->hFile, szBuffer ); + sprintf_s( szBuffer, + ESETEST_ELHELPER_STRING_BUFFER_SIZE, + "Event record number: %lu" CRLF, dwRecordNumber ); + IEventLoggingPrintToFile( hQuery->hFile, szBuffer ); sprintf_s( szBuffer, ESETEST_ELHELPER_STRING_BUFFER_SIZE, "Event source: %S" CRLF, wszEventSource ); @@ -372,12 +375,12 @@ IEventLoggingBackgroundListening( HANDLE EventLoggingCreateQuery( - __in_opt PFNEVENTLOGGING pfnCallback, + __in_opt PFNEVENTLOGGING pfnCallback, __in_opt PCWSTR wszEventLog, __in_ecount_opt( cEventSources ) PCWSTR* pwszEventSources, _In_ size_t cEventSources, - __in_opt PSYSTEMTIME pTimeMin, - __in_opt PSYSTEMTIME pTimeMax, + __in_opt PSYSTEMTIME pTimeMin, + __in_opt PSYSTEMTIME pTimeMax, __in_ecount_opt( cEventTypes ) PWORD pEventTypes, _In_ size_t cEventTypes, __in_ecount_opt( cEventCategories ) PWORD pEventCategories, @@ -476,11 +479,9 @@ EventLoggingCreateQuery( if ( NULL != pTimeMin ){ hQuery->dwTimeMin = SystemTimeToSecondsSince1970( pTimeMin ); } - SecondsSince1970ToSystemTime( hQuery->dwTimeMin, &hQuery->stTimeMin ); if ( NULL != pTimeMax ){ hQuery->dwTimeMax = SystemTimeToSecondsSince1970( pTimeMax ); } - SecondsSince1970ToSystemTime( hQuery->dwTimeMax, &hQuery->stTimeMax ); // Event type filters. if ( NULL != pEventTypes ){ @@ -727,6 +728,7 @@ EventLoggingModuleFromEventSource( VOID EventLoggingPrintEvent( _In_ PCWSTR wszEventLog, + _In_ DWORD dwRecordNumber, _In_ PCWSTR wszEventSource, _In_ PSYSTEMTIME pTimeGenerated, _In_ WORD wEventType, @@ -744,6 +746,7 @@ EventLoggingPrintEvent( tprintf( CRLF ); tprintf( "Event log: %S" CRLF, wszEventLog ); + tprintf( "Event record number: %lu" CRLF, dwRecordNumber ); tprintf( "Event source: %S" CRLF, wszEventSource ); tprintf( "Time generated: %04u/%02u/%02u %02u:%02u:%02u" CRLF, pTimeGenerated->wYear, pTimeGenerated->wMonth, pTimeGenerated->wDay, @@ -773,12 +776,12 @@ EventLoggingPrintEvent( //Windows Phone HANDLE EventLoggingCreateQuery( - __in_opt PFNEVENTLOGGING pfnCallback, + __in_opt PFNEVENTLOGGING pfnCallback, __in_opt PCWSTR wszEventLog, __in_ecount_opt( cEventSources ) PCWSTR* pwszEventSources, _In_ size_t cEventSources, - __in_opt PSYSTEMTIME pTimeMin, - __in_opt PSYSTEMTIME pTimeMax, + __in_opt PSYSTEMTIME pTimeMin, + __in_opt PSYSTEMTIME pTimeMax, __in_ecount_opt( cEventTypes ) PWORD pEventTypes, _In_ size_t cEventTypes, __in_ecount_opt( cEventCategories ) PWORD pEventCategories, @@ -822,6 +825,7 @@ EventLoggingModuleFromEventSource( VOID EventLoggingPrintEvent( _In_ PCWSTR wszEventLog, + _In_ DWORD dwRecordNumber, _In_ PCWSTR wszEventSource, _In_ PSYSTEMTIME pTimeGenerated, _In_ WORD wEventType, diff --git a/test/ese/src/inc/esetest.h b/test/ese/src/inc/esetest.h index da32fd46..b146d0e4 100644 --- a/test/ese/src/inc/esetest.h +++ b/test/ese/src/inc/esetest.h @@ -1841,6 +1841,7 @@ void PerfCollectorStopAndReport( HANDLE hCollector, const char* const szLabel ); typedef VOID (__stdcall *PFNEVENTLOGGING) ( PWSTR wszEventLog, + DWORD dwRecordNumber, PWSTR wszEventSource, PSYSTEMTIME pTimeGenerated, WORD wEventType, @@ -1947,6 +1948,7 @@ EventLoggingModuleFromEventSource( VOID EventLoggingPrintEvent( _In_ PCWSTR wszEventLog, + _In_ DWORD dwRecordNumber, _In_ PCWSTR wszEventSource, _In_ PSYSTEMTIME pTimeGenerated, _In_ WORD wEventType, From c60f4b2907a0e18fcd2c85e24288a6a5718f8bb6 Mon Sep 17 00:00:00 2001 From: Vakishan Date: Wed, 24 Aug 2022 01:33:41 +0000 Subject: [PATCH 006/102] Log extent freed when an allocated extent is rolled back to help reconcile page if it is within required range [Substrate:50d6edd67b3d62b8d5d2e4f5ce69c31cd4b62a42] --- dev/ese/src/ese/dbshrink.cxx | 2 +- dev/ese/src/ese/space.cxx | 18 +++++++++--------- dev/ese/src/ese/ver.cxx | 15 +++++++++++++-- dev/ese/src/inc/space.hxx | 4 +++- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/dev/ese/src/ese/dbshrink.cxx b/dev/ese/src/ese/dbshrink.cxx index 32c2c8a8..f8ec81ac 100644 --- a/dev/ese/src/ese/dbshrink.cxx +++ b/dev/ese/src/ese/dbshrink.cxx @@ -1923,7 +1923,7 @@ LOCAL ERR ErrSHKIRootMoveCheck( const ROOTMOVE& rm, FUCB* const pfucb, const OBJ pfucbChild->pcsrRoot = Pcsr( pfucbChild ); // Check against the previously enumerated objected. - if ( PgnoSPIParentFDP( pfucbChild ) != rm.pgnoNewFDP ) + if ( PgnoSPParentFDP( pfucbChild ) != rm.pgnoNewFDP ) { AssertTrack( fFalse, "RootMoveBadPgnoParentFdp" ); Error( ErrERRCheck( JET_errDatabaseCorrupted ) ); diff --git a/dev/ese/src/ese/space.cxx b/dev/ese/src/ese/space.cxx index 9be172d1..23ec47f2 100644 --- a/dev/ese/src/ese/space.cxx +++ b/dev/ese/src/ese/space.cxx @@ -2567,7 +2567,7 @@ INLINE const SPACE_HEADER * PsphSPIRootPage( FUCB* pfucb ) // get pgnoFDP of parentFDP of this tree // -PGNO PgnoSPIParentFDP( FUCB *pfucb ) +PGNO PgnoSPParentFDP( FUCB *pfucb ) { return PsphSPIRootPage( pfucb )->PgnoParent(); } @@ -7466,7 +7466,7 @@ LOCAL ERR ErrSPIFreeSEToParent( // get parentFDP's root pgno // cursor passed in should be at root of tree // so we can access pgnoParentFDP from the external header - const PGNO pgnoParentFDP = PgnoSPIParentFDP( pfucb ); + const PGNO pgnoParentFDP = PgnoSPParentFDP( pfucb ); if ( pgnoParentFDP == pgnoNull ) { // This is the root DB and its parent is the file system, so there's nothing to release @@ -8006,7 +8006,7 @@ ERR ErrSPIAERemoveInsertionRegion( return err; } -LOCAL VOID SPIReportSpaceLeak( _In_ const FUCB* const pfucb, _In_ const ERR err, _In_ const PGNO pgnoFirst, _In_ const CPG cpg, __in_z const CHAR* const szTag ) +VOID SPReportSpaceLeak( _In_ const FUCB* const pfucb, _In_ const ERR err, _In_ const PGNO pgnoFirst, _In_ const CPG cpg, __in_z const CHAR* const szTag ) { Assert( pfucb != NULL ); Expected( err < JET_errSuccess ); @@ -8583,7 +8583,7 @@ ERR ErrSPCaptureNonRevertableFDPRootPage( PIB *ppib, FCB* pfcbFDPToFree, const P HandleError: if ( err < JET_errSuccess ) { - SPIReportSpaceLeak( pfucb, err, pfcbFDPToFree->PgnoFDP(), 1, "CaptureNonRevertableFDPRootPage" ); + SPReportSpaceLeak( pfucb, err, pfcbFDPToFree->PgnoFDP(), 1, "CaptureNonRevertableFDPRootPage" ); } if ( pfucbNil != pfucb ) @@ -8681,7 +8681,7 @@ ERR ErrSPCaptureSpaceTreePages( FUCB* const pfucbParent, FCB* pfcb, CPG* pcpgSna HandleError: if ( err < JET_errSuccess ) { - SPIReportSpaceLeak( pfucbOE, err, pgnoFirst, cpgExtent, "CaptureSpaceTreePages" ); + SPReportSpaceLeak( pfucbOE, err, pgnoFirst, cpgExtent, "CaptureSpaceTreePages" ); } if ( pfucbOE != pfucbNil ) @@ -8940,7 +8940,7 @@ ERR ErrSPFreeExt( FUCB* const pfucb, const PGNO pgnoFirst, const CPG cpgSize, co err, err ) ); - SPIReportSpaceLeak( pfucb, err, pgnoFirst, cpgSize, szTag ); + SPReportSpaceLeak( pfucb, err, pgnoFirst, cpgSize, szTag ); } else { @@ -10931,7 +10931,7 @@ ERR ErrSPFreeFDP( // get parent FDP pgno // - Assert( pgnoFDPParent == PgnoSPIParentFDP( pfucb ) ); + Assert( pgnoFDPParent == PgnoSPParentFDP( pfucb ) ); Assert( pgnoFDPParent == PgnoFDP( pfucbParent ) ); if ( !pfucb->u.pfcb->FSpaceInitialized() ) @@ -11468,7 +11468,7 @@ LOCAL ERR ErrSPIAddSecondaryExtent( Assert( ( err < JET_errSuccess ) || ( fAddedToOwnExt && fAddedToAvailExt ) ); if ( fAddedToOwnExt && !fAddedToAvailExt ) { - SPIReportSpaceLeak( pfucb, err, pgnoLast - cpgAvailable + 1, cpgAvailable, "NewExt" ); + SPReportSpaceLeak( pfucb, err, pgnoLast - cpgAvailable + 1, cpgAvailable, "NewExt" ); } return err; @@ -13519,7 +13519,7 @@ LOCAL ERR ErrSPIReserveSPBufPages( if ( !fExtentFreed ) { - SPIReportSpaceLeak( pfucb, err, extinfo.PgnoFirst(), (CPG)extinfo.CpgExtent(), "SpBuffer" ); + SPReportSpaceLeak( pfucb, err, extinfo.PgnoFirst(), (CPG)extinfo.CpgExtent(), "SpBuffer" ); } } diff --git a/dev/ese/src/ese/ver.cxx b/dev/ese/src/ese/ver.cxx index ad3a1897..bcb2b734 100644 --- a/dev/ese/src/ese/ver.cxx +++ b/dev/ese/src/ese/ver.cxx @@ -5157,8 +5157,19 @@ LOCAL VOID VERIFreeExt( PIB * const ppib, FCB *pfcb, PGNO pgnoFirst, CPG cpg ) const BOOL fCleanUpStateSavedSavedSaved = FOSSetCleanupState( fFalse ); - (VOID)ErrSPFreeExt( pfucb, pgnoFirst, cpg, "VerFreeExt" ); - + // Free extent only only after logging extent freed. If not, we might not capture the fact that these pages need to be reconciled if in required range and page is in dbtimeRevert. + // We will also mark the extent empty to avoid re-capturing page preimage. + err = ErrSPCaptureSnapshot( pfucb, pgnoFirst, cpg, fTrue ); + + if ( err >= JET_errSuccess ) + { + (VOID)ErrSPFreeExt( pfucb, pgnoFirst, cpg, "VerFreeExt" ); + } + else + { + SPReportSpaceLeak( pfucb, err, pgnoFirst, cpg, "VerFreeExt" ); + } + // Restore cleanup checking FOSSetCleanupState( fCleanUpStateSavedSavedSaved ); diff --git a/dev/ese/src/inc/space.hxx b/dev/ese/src/inc/space.hxx index 222e5c85..04136208 100644 --- a/dev/ese/src/inc/space.hxx +++ b/dev/ese/src/inc/space.hxx @@ -437,7 +437,9 @@ INLINE BOOL FSPExpectedError( const ERR err ) BOOL FSPIsRootSpaceTree( const FUCB * const pfucb ); -PGNO PgnoSPIParentFDP( FUCB *pfucb ); +PGNO PgnoSPParentFDP( FUCB *pfucb ); + +VOID SPReportSpaceLeak( _In_ const FUCB* const pfucb, _In_ const ERR err, _In_ const PGNO pgnoFirst, _In_ const CPG cpg, __in_z const CHAR* const szTag ); // space Manager constants const INT cSecFrac = 4; // divider of primary extent to get secondary From 4e9750e36036a47030c6852150dddf642bae9730 Mon Sep 17 00:00:00 2001 From: Anil Ruia Date: Thu, 25 Aug 2022 16:16:43 +0000 Subject: [PATCH 007/102] Improve detection of whether transaction is actually holding back version store cleanup. Also, trigger version store cleanup task when read-only transaction check for max transaction size [Substrate:3afad5cd480190a23d9d35c45c87488f103f46dc] --- dev/ese/src/ese/ver.cxx | 16 ++++++++++++++-- dev/ese/src/inc/ver.hxx | 13 ++++++++++++- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/dev/ese/src/ese/ver.cxx b/dev/ese/src/ese/ver.cxx index bcb2b734..ba0fb266 100644 --- a/dev/ese/src/ese/ver.cxx +++ b/dev/ese/src/ese/ver.cxx @@ -188,6 +188,8 @@ VER::VER( INST *pinst ) (INT)UlParam(pinst, JET_paramVersionStoreTaskQueueMax), ctasksPerBatchMaxDefault, ctasksBatchedMaxDefault ), + m_fAboveMaxTransactionSize( fFalse ), + m_trxOldestRCE( trxMin ), m_cresBucket( pinst ) { @@ -1758,6 +1760,10 @@ ERR VER::ErrVERICreateRCE( } Call( ErrVERIAllocateRCE( cbNewRCE, &prce, uiHashConcurrentOp ) ); + if ( m_trxOldestRCE == trxMin ) + { + m_trxOldestRCE = trxBegin0; + } #ifdef DEBUG if ( !PinstFromIfmp( pfcb->Ifmp() )->m_plog->FRecovering() ) @@ -4412,11 +4418,11 @@ ERR VER::ErrVERCheckTransactionSize( PIB * const ppib ) ERR err = JET_errSuccess; if ( m_fAboveMaxTransactionSize ) { - UpdateCachedTrxOldest( m_pinst ); + VERSignalCleanup(); // If this is the oldest transaction and the version store is too // full, return an error - if ( ppib->trxBegin0 == TrxOldestCached( m_pinst ) ) + if ( TrxCmp( ppib->trxBegin0, m_trxOldestRCE ) <= 0 ) { const BOOL fCleanupWasRun = m_msigRCECleanPerformedRecently.FWait( cmsecAsyncBackgroundCleanup ); @@ -6149,6 +6155,11 @@ ERR VER::ErrVERIRCEClean( const IFMP ifmp ) const TRX trxRCECommitted = prce->TrxCommitted(); BOOL fCleanable = fFalse; + if ( !fCleanOneDb ) + { + m_trxOldestRCE = fFullyCommitted ? trxRCECommitted : prce->TrxBegin0(); + } + if ( trxMax == trxOldest ) { // trxOldest may no longer be trxMax. if so we may not be able to @@ -6344,6 +6355,7 @@ ERR VER::ErrVERIRCEClean( const IFMP ifmp ) } else { + m_trxOldestRCE = trxMin; Assert( pbucketNil == m_pbucketGlobalTail ); } m_critBucketGlobal.Leave(); diff --git a/dev/ese/src/inc/ver.hxx b/dev/ese/src/inc/ver.hxx index b1cf2b05..e12c6dc1 100644 --- a/dev/ese/src/inc/ver.hxx +++ b/dev/ese/src/inc/ver.hxx @@ -827,7 +827,7 @@ INLINE RCEID RCE::Rceid() const INLINE TRX RCE::TrxBegin0 () const // ================================================================ { - Assert( FAssertReadable_() ); + Assert( FIsRCECleanup() || FAssertReadable_() ); return m_trxBegin0; } @@ -1626,6 +1626,7 @@ public: RECTASKBATCHER m_rectaskbatcher; BOOL m_fAboveMaxTransactionSize; + TRX m_trxOldestRCE; public: #ifdef VERPERF @@ -1753,6 +1754,16 @@ public: INLINE RCE **PGetChain( UINT ui ); INLINE VOID SetChain( UINT ui, RCE * ); + BOOL FAboveMaxTransactionSize() const + { + return m_fAboveMaxTransactionSize; + } + + TRX TrxOldestRCE() const + { + return m_trxOldestRCE; + } + #ifdef RTM #else public: From ac5fc2f823ae8c37ead8c94ed145003a549f6123 Mon Sep 17 00:00:00 2001 From: Vakishan Date: Thu, 25 Aug 2022 20:54:05 +0000 Subject: [PATCH 008/102] Adjust loose truncation threshold for RBS and add max RBS space threshold 1. We will truncate RBS if we use more than 300GB of disk space. 2. We will also start loose truncation at a higher threshold of 200GB i.e., if we have lesser than 200GB of disk space left we will start truncating the snapshots. [Substrate:906a927437029057120afd6c152b744c39db0544] --- dev/ese/src/ese/rbscleaner_test.cxx | 71 +++++++++++++++++++++++++++++ dev/ese/src/ese/revertsnapshot.cxx | 23 ++++++++-- dev/ese/src/inc/revertsnapshot.h | 10 ++++ 3 files changed, 101 insertions(+), 3 deletions(-) diff --git a/dev/ese/src/ese/rbscleaner_test.cxx b/dev/ese/src/ese/rbscleaner_test.cxx index f3f4e9be..d2978d09 100644 --- a/dev/ese/src/ese/rbscleaner_test.cxx +++ b/dev/ese/src/ese/rbscleaner_test.cxx @@ -47,6 +47,9 @@ class RBSCleanerTestConfig : public IRBSCleanerConfig QWORD CbMaxSpaceForRBSWhenLowDiskSpace() { return m_cbMaxSpaceForRBSWhenLowDiskSpace; } VOID SetCbMaxSpaceForRBSWhenLowDiskSpace( QWORD cbMaxSpaceForRBSWhenLowDiskSpace ) { m_cbMaxSpaceForRBSWhenLowDiskSpace = cbMaxSpaceForRBSWhenLowDiskSpace; } + QWORD CbMaxSpaceForRBS() { return m_cbMaxSpaceForRBS; } + VOID SetCbMaxSpaceForRBS( QWORD cbMaxSpaceForRBS ) { m_cbMaxSpaceForRBS = cbMaxSpaceForRBS; } + INT CSecRBSMaxTimeSpan() { return m_cSecRBSMaxTimeSpan; } VOID SetCSecRBSMaxTimeSpan( INT cSecRBSMaxTimeSpan ) { m_cSecRBSMaxTimeSpan = cSecRBSMaxTimeSpan; } @@ -62,6 +65,7 @@ class RBSCleanerTestConfig : public IRBSCleanerConfig m_fEnableCleanup = fTrue; m_cbLowDiskSpaceThreshold = 1073741824; // 1GB m_cbMaxSpaceForRBSWhenLowDiskSpace = 1048576; // 1MB + m_cbMaxSpaceForRBS = 2147483648; // 2GB, same as disk size by default. m_cSecRBSMaxTimeSpan = 300; // 5mins m_cSecMinCleanupIntervalTime = 1; // every 1sec m_lFirstValidRBSGen = 1; @@ -74,6 +78,7 @@ class RBSCleanerTestConfig : public IRBSCleanerConfig QWORD m_cbLowDiskSpaceThreshold; QWORD m_cbLowDiskSpaceDisableRBSThreshold; QWORD m_cbMaxSpaceForRBSWhenLowDiskSpace; + QWORD m_cbMaxSpaceForRBS; INT m_cSecRBSMaxTimeSpan; INT m_cSecMinCleanupIntervalTime; LONG m_lFirstValidRBSGen; @@ -853,3 +858,69 @@ JETUNITTEST( RBSCleaner, ExpiredBackupSnapshotsRemoved ) CHECKCALLS( JetTerm2( (JET_INSTANCE) pinst, JET_bitTermAbrupt ) ); } + +// Max space of RBS reached and we have to remove multiple RBS files to free up space +JETUNITTEST( RBSCleaner, MaxRBSSpaceRequiringMultipleRBSRemoval ) +{ + __int64 ftStartTime = UtilGetCurrentFileTime(); + unique_ptr pconfig( new RBSCleanerTestConfig() ); + pconfig->SetCSecRBSMaxTimeSpan( 3600 ); + + RBSCleanerTestState* pstate = new RBSCleanerTestState(); + RBSCleanerTestIOOperator* piooperator = new RBSCleanerTestIOOperator(); + piooperator->m_lRBSGenMin = 1; + piooperator->m_lRBSGenMax = 10; + + // Configure rbs disk space threshold such that we are consuming extra space and multiple RBS snapshots need to be removed. + pconfig->SetCbMaxSpaceForRBS( 2 * piooperator->m_cbDirSize ); + + INST* pinst; + CHECKCALLS( JetCreateInstance2W( (JET_INSTANCE*)&pinst, NULL, NULL, JET_bitNil ) ); + CHECKCALLS( JetSetSystemParameterW( (JET_INSTANCE*)&pinst, JET_sesidNil, JET_paramEnableRBS, 1, NULL ) ); + + unique_ptr prbscleaner( new RBSCleaner( pinst, piooperator, pstate, pconfig.release() ) ); + CHECK( JET_errSuccess == prbscleaner->ErrStartCleaner() ); + + SleepTillConditionSatisfied( pstate->CPassesFinished() == 1, 2, MaxTestRunTimeInMSec ); + + CHECK( pstate->FtPassStartTime() >= ftStartTime ); + CHECK( pstate->FtPrevPassCompletionTime() >= pstate->FtPassStartTime() ); + CHECK( piooperator->m_cRemoveFolderCalls == 8 ); + CHECK( piooperator->m_lRBSGenMin == 9 ); + + CHECKCALLS( JetTerm2( (JET_INSTANCE)pinst, JET_bitTermAbrupt ) ); +} + +// Low disk space but there is space occupied by backup RBS which could be removed. But that's not enough and we need to remove one more RBS. +JETUNITTEST( RBSCleaner, MaxRBSSpaceBackupRBSRemovalNotEnough ) +{ + __int64 ftStartTime = UtilGetCurrentFileTime(); + unique_ptr pconfig( new RBSCleanerTestConfig() ); + pconfig->SetCSecRBSMaxTimeSpan( 3600 ); + + RBSCleanerTestState* pstate = new RBSCleanerTestState(); + RBSCleanerTestIOOperator* piooperator = new RBSCleanerTestIOOperator(); + piooperator->m_lRBSGenMin = 1; + piooperator->m_lRBSGenMax = 10; + piooperator->m_lRBSGenMinBackup = 11; + piooperator->m_lRBSGenMaxBackup = 15; + + // Configure rbs disk space threshold such that we are consuming extra space and backup RBS removal alone shouldn't clear up enough space. + pconfig->SetCbMaxSpaceForRBS( ( piooperator->m_lRBSGenMax - piooperator->m_lRBSGenMin ) * piooperator->m_cbDirSize ); + + INST* pinst; + CHECKCALLS( JetCreateInstance2W( (JET_INSTANCE*)&pinst, NULL, NULL, JET_bitNil ) ); + CHECKCALLS( JetSetSystemParameterW( (JET_INSTANCE*)&pinst, JET_sesidNil, JET_paramEnableRBS, 1, NULL ) ); + + unique_ptr prbscleaner( new RBSCleaner( pinst, piooperator, pstate, pconfig.release() ) ); + CHECK( JET_errSuccess == prbscleaner->ErrStartCleaner() ); + + SleepTillConditionSatisfied( pstate->CPassesFinished() == 1, 2, MaxTestRunTimeInMSec ); + + CHECK( pstate->FtPassStartTime() >= ftStartTime ); + CHECK( pstate->FtPrevPassCompletionTime() >= pstate->FtPassStartTime() ); + CHECK( piooperator->m_cRemoveFolderCalls == 6 ); + CHECK( piooperator->m_lRBSGenMin == 2 ); + + CHECKCALLS( JetTerm2( (JET_INSTANCE)pinst, JET_bitTermAbrupt ) ); +} diff --git a/dev/ese/src/ese/revertsnapshot.cxx b/dev/ese/src/ese/revertsnapshot.cxx index b7cd5aa5..3be348da 100644 --- a/dev/ese/src/ese/revertsnapshot.cxx +++ b/dev/ese/src/ese/revertsnapshot.cxx @@ -3694,7 +3694,13 @@ ERR RBSCleaner::ErrRBSCleanupBackup( QWORD* cbFreeRBSDisk, QWORD* cbTotalRBSDisk return JET_errSuccess; } - if ( *cbFreeRBSDisk < cbLowDiskSpace && *cbTotalRBSDiskSpace > cbMaxRBSSpaceLowDiskSpace ) + // We will restrict max space RBS can consume on the disk to avoid overrunning the disk with snapshots. + if ( *cbTotalRBSDiskSpace > m_prbscleanerconfig->CbMaxSpaceForRBS() ) + { + fRBSCleanupBackup = fTrue; + wszRBSBackupRemoveReason = L"MaxRBSDiskSpace"; + } + else if ( *cbFreeRBSDisk < cbLowDiskSpace && *cbTotalRBSDiskSpace > cbMaxRBSSpaceLowDiskSpace ) { // Low disk space, lets clean up all the backup snapshots we have for investigation. fRBSCleanupBackup = fTrue; @@ -3847,13 +3853,24 @@ ERR RBSCleaner::ErrDoOneCleanupPass() wszRBSRemoveReason = L"InvalidRBS"; } - if ( fRBSCleanupMinGen || ( cbFreeRBSDisk < cbLowDiskSpace && cbTotalRBSDiskSpace > cbMaxRBSSpaceLowDiskSpace ) ) + + if ( !fRBSCleanupMinGen ) { - if ( !fRBSCleanupMinGen ) + // We will restrict max space RBS can consume on the disk to avoid overrunning the disk with snapshots. + if ( cbTotalRBSDiskSpace > m_prbscleanerconfig->CbMaxSpaceForRBS() ) { + fRBSCleanupMinGen = fTrue; + wszRBSRemoveReason = L"MaxRBSDiskSpace"; + } + else if ( cbFreeRBSDisk < cbLowDiskSpace && cbTotalRBSDiskSpace > cbMaxRBSSpaceLowDiskSpace ) + { + fRBSCleanupMinGen = fTrue; wszRBSRemoveReason = L"LowDiskSpace"; } + } + if ( fRBSCleanupMinGen ) + { Call( m_prbscleaneriooperator->ErrGetDirSize( wszRBSAbsDirPath, &cbRBSDiskSpace ) ); Call( m_prbscleaneriooperator->ErrRemoveFolder( wszRBSAbsDirPath, wszRBSRemoveReason ) ); cbTotalRBSDiskSpace -= cbRBSDiskSpace; diff --git a/dev/ese/src/inc/revertsnapshot.h b/dev/ese/src/inc/revertsnapshot.h index 8933d72f..b921b5d5 100644 --- a/dev/ese/src/inc/revertsnapshot.h +++ b/dev/ese/src/inc/revertsnapshot.h @@ -13,6 +13,7 @@ #define csecSpaceUsagePeriodicLog 3600 #define cbMaxRBSSizeAllowed 100LL*1024*1024*1024 #define cbRBSIOSize 512*1024 +#define cbMaxRBSDiskSpace 300LL*1024*1024*1024 C_ASSERT( cbRBSSegmentSizeMask == cbRBSSegmentSize - 1 ); @@ -474,6 +475,9 @@ class IRBSCleanerConfig // Max alloted space for revert snapshots when the disk space is low. virtual QWORD CbMaxSpaceForRBSWhenLowDiskSpace() = 0; + // Max alloted space for revert snapshots under normal conditions. + virtual QWORD CbMaxSpaceForRBS() = 0; + // Time since when we need revert snapshots relative to current time. virtual INT CSecRBSMaxTimeSpan() = 0; @@ -503,6 +507,7 @@ class RBSCleanerConfig : public IRBSCleanerConfig QWORD CbLowDiskSpaceThreshold(); QWORD CbLowDiskSpaceDisableRBSThreshold(); QWORD CbMaxSpaceForRBSWhenLowDiskSpace(); + QWORD CbMaxSpaceForRBS(); INT CSecRBSMaxTimeSpan(); INT CSecMinCleanupIntervalTime(); LONG LFirstValidRBSGen() { return 1; } @@ -532,6 +537,11 @@ INLINE QWORD RBSCleanerConfig::CbMaxSpaceForRBSWhenLowDiskSpace() return ( (QWORD) UlParam( m_pinst, JET_paramFlight_RBSMaxSpaceWhenLowDiskSpaceGb ) ) * 1024 * 1024 * 1024; } +INLINE QWORD RBSCleanerConfig::CbMaxSpaceForRBS() +{ + return cbMaxRBSDiskSpace; +} + INLINE INT RBSCleanerConfig::CSecMinCleanupIntervalTime() { return ( INT )UlParam( m_pinst, JET_paramFlight_RBSCleanupIntervalMinSec ); From 61c15af805bf591ec09c37cd02bb99f56f6d754d Mon Sep 17 00:00:00 2001 From: Alexandre Costa Date: Sat, 27 Aug 2022 00:53:34 +0000 Subject: [PATCH 009/102] Improve conflict resolution logic in Leak Report. - Implemented a more aggressive retry strategy. - Added failed conflict resolution to telemetry. [Substrate:eadb3be0373aee4441571e003f6b3f524398be95] --- dev/ese/src/_res/jetmsg.mc | 1 + dev/ese/src/ese/dbutil.cxx | 83 ++++++++++++++++++++++++++------------ 2 files changed, 59 insertions(+), 25 deletions(-) diff --git a/dev/ese/src/_res/jetmsg.mc b/dev/ese/src/_res/jetmsg.mc index 835eec8a..1f1cb8fd 100644 --- a/dev/ese/src/_res/jetmsg.mc +++ b/dev/ese/src/_res/jetmsg.mc @@ -2622,6 +2622,7 @@ Number of uncached primary objects: %38%n Correction applied to space owned by primary objects: %46 page(s) (%47 bytes, %48%%).%n Enumeration conflicts resolved successfully: %49%n Enumeration conflicts not resolved successfully: %50%n +Time spent resolving conflicts: %51 seconds(s).%n Performance: %39 page(s) read, %40 page(s) preread, %41 page(s) referenced, %42 page(s) dirtied, %43 page(s) re-dirtied.%n Duration: %44 minute(s) and %45 second(s).%n . diff --git a/dev/ese/src/ese/dbutil.cxx b/dev/ese/src/ese/dbutil.cxx index da846680..c18cd73c 100644 --- a/dev/ese/src/ese/dbutil.cxx +++ b/dev/ese/src/ese/dbutil.cxx @@ -5155,6 +5155,7 @@ LOCAL VOID DBUTLIReportSpaceLeakEstimationSucceeded( const ULONG cUncachedPrimary, const ULONG cEnumerationConflictsSucceeded, const ULONG cEnumerationConflictsFailed, + const double dblSecTotalRetry, const JET_THREADSTATS& jts, const ULONG ulMinElapsed, const double dblSecElapsed ) @@ -5193,7 +5194,8 @@ LOCAL VOID DBUTLIReportSpaceLeakEstimationSucceeded( OSFormatW( L"%u", ulMinElapsed ), OSFormatW( L"%.3f", dblSecElapsed ), OSFormatW( L"%d", cpgOwnedPrimaryCorrection ), OSFormatW( L"%I64d", pfmp->CbOfCpgSigned( cpgOwnedPrimaryCorrection ) ), ( ( cpgOwnedPrimaryOriginal != 0 ) ? OSFormatW( L"%.3f", ( 100.0 * (double)cpgOwnedPrimaryCorrection ) / (double)cpgOwnedPrimaryOriginal ) : L"-" ), OSFormatW( L"%u", cEnumerationConflictsSucceeded ), - OSFormatW( L"%u", cEnumerationConflictsFailed ) + OSFormatW( L"%u", cEnumerationConflictsFailed ), + OSFormatW( L"%.3f", dblSecTotalRetry ) }; UtilReportEvent( eventInformation, @@ -5251,6 +5253,7 @@ LOCAL ERR ErrDBUTLIEstimateRootSpaceLeak( PIB* const ppib, const IFMP ifmp ) OBJID objidLast = objidNil; PGNO pgnoFDPLast = pgnoNull; ULONG cEnumerationConflictsFailed = 0, cEnumerationConflictsSucceeded = 0; + HRT dhrtEnumerationConflictsDuration = 0; CPG cpgOwnedPrimary = 0, cpgOwnedPrimaryCorrection = 0; ULONG cCachedPrimary = 0, cUncachedPrimary = 0; CPG cpgUsedRoot = 0, cpgUsedOe = 0, cpgUsedAe = 0; @@ -5313,17 +5316,26 @@ LOCAL ERR ErrDBUTLIEstimateRootSpaceLeak( PIB* const ppib, const IFMP ifmp ) // Test injection. OnDebug( while ( objidLast >= (OBJID)UlConfigOverrideInjection( 48550, objidFDPOverMax ) ) ); - BOOL fRetried = fFalse, fRetry = fFalse; + double dblSecLeakReportRetryMax = 60.0; + const TICK dtickLeakReportRetrySleep = 10; + HRT hrtRetryStart = 0; const BOOL fInfiniteRetries = OnDebugOrRetail( fTrue, fFalse ); + BOOL fRetry = fFalse, fRetried = fFalse; ERR errRetry = JET_errSuccess; const CHAR* wszRetryReason = ""; do { - fRetried = fRetry; if ( fRetry ) { - UtilSleep( 10 ); fRetry = fFalse; + + if ( !fRetried ) + { + fRetried = fTrue; + hrtRetryStart = HrtHRTCount(); + } + + UtilSleep( dtickLeakReportRetrySleep ); } err = ErrFILEOpenTable( ppib, ifmp, &pfucbTable, szObjectName, JET_bitTableReadOnly | JET_bitTableTryPurgeOnClose ); @@ -5334,25 +5346,27 @@ LOCAL ERR ErrDBUTLIEstimateRootSpaceLeak( PIB* const ppib, const IFMP ifmp ) const BOOL fFoundFcb = ( FCB::PfcbFCBGet( ifmp, pgnoFDPLast, &fcbsf, fFalse /* fIncrementRefCount */, fTrue /* fInitForRecovery */ ) != pfcbNil ); const BOOL fDeletePending = fFoundFcb && ( fcbsf & fcbsfDeletePending ); - if ( fFoundFcb && !fDeletePending ) + if ( fFoundFcb ) { - // This is unexpected if we know the table is actually getting deleted. - Assert( err != JET_errObjectNotFound ); - fRetry = fTrue; - wszRetryReason = "DelNotPending"; - } - else if ( fFoundFcb && fDeletePending ) - { - // Table deletion is still pending. - fRetry = fTrue; - wszRetryReason = "DelPending"; + if ( !fDeletePending ) + { + // This is unexpected if we know the table is actually getting deleted. + Assert( err != JET_errObjectNotFound ); + fRetry = fTrue; + wszRetryReason = "DelNotPending"; + } + else + { + // Table deletion is still pending. + fRetry = fTrue; + wszRetryReason = "DelPending"; - // Perform cleanup. - (void)PverFromPpib( ppib )->ErrVERRCEClean( ifmp ); + // Perform cleanup: it may not help if an open transaction is holding us up. + (void)PverFromPpib( ppib )->ErrVERRCEClean( ifmp ); + } } else { - Assert( !fFoundFcb ); if ( err == JET_errTableLocked ) { // Either the version store entry for the table deletion has cleared, @@ -5396,26 +5410,43 @@ LOCAL ERR ErrDBUTLIEstimateRootSpaceLeak( PIB* const ppib, const IFMP ifmp ) pfucbTable = pfucbNil; } - if ( fRetried ) + if ( fRetry ) { - if ( fRetry ) + if ( !fInfiniteRetries ) { - cEnumerationConflictsFailed++; - if ( !fInfiniteRetries ) + if ( fRetried ) { - FireWall( OSFormat( "LeakReportConflict:%s:%d", wszRetryReason, errRetry ) ); + // If we've previously failed, there's no point in trying hard to resolve conflicts, as the + // data will be unreliable anwyways, so reduce the retry timeout. + if ( cEnumerationConflictsFailed > 0 ) + { + dblSecLeakReportRetryMax = 0.010; + } + + if ( DblHRTSecondsElapsed( DhrtHRTElapsedFromHrtStart( hrtRetryStart ) ) >= dblSecLeakReportRetryMax ) + { + fRetry = fFalse; + dhrtEnumerationConflictsDuration += DhrtHRTElapsedFromHrtStart( hrtRetryStart ); + cEnumerationConflictsFailed++; + FireWall( OSFormat( "LeakReportConflict:%s:%d", wszRetryReason, errRetry ) ); + } } } else { - cEnumerationConflictsSucceeded++; + cEnumerationConflictsFailed++; } } + else if ( fRetried ) + { + dhrtEnumerationConflictsDuration += DhrtHRTElapsedFromHrtStart( hrtRetryStart ); + cEnumerationConflictsSucceeded++; + } Assert( pfucbTable == pfucbNil ); Assert( err >= JET_errSuccess ); } - while ( fRetry && ( !fRetried || fInfiniteRetries ) ); + while ( fRetry ); } pfmp->SetOjidLeakEstimation( objidLast ); @@ -5562,6 +5593,7 @@ LOCAL ERR ErrDBUTLIEstimateRootSpaceLeak( PIB* const ppib, const IFMP ifmp ) ppib->ResetFSessionLeakReport(); const double dblSecTotalElapsed = DblHRTSecondsElapsed( DhrtHRTElapsedFromHrtStart( hrtStart ) ); + const double dblSecTotalRetry = DblHRTSecondsElapsed( dhrtEnumerationConflictsDuration ); const ULONG ulMinElapsed = (ULONG)( dblSecTotalElapsed / 60.0 ); const double dblSecElapsed = dblSecTotalElapsed - (double)ulMinElapsed * 60.0; if ( err >= JET_errSuccess ) @@ -5588,6 +5620,7 @@ LOCAL ERR ErrDBUTLIEstimateRootSpaceLeak( PIB* const ppib, const IFMP ifmp ) cUncachedPrimary, cEnumerationConflictsSucceeded, cEnumerationConflictsFailed, + dblSecTotalRetry, jts, ulMinElapsed, dblSecElapsed ); From 37a3a1d6ff6f76666ec8762ed8165c6af3240f75 Mon Sep 17 00:00:00 2001 From: Michael Thorp Date: Thu, 1 Sep 2022 04:00:18 +0000 Subject: [PATCH 010/102] Sync Windows/ESENT changes to Exchange/ESE Sync Windows/ESENT changes through commit 4a0e169cb9793. [OS:4a0e169cb9793] [Substrate:241509805d387723bee5196aab52decaf41a2a69] --- CMakeLists.txt | 5 +++++ dev/ese/published/inc/os/memory.hxx | 7 ++++-- dev/ese/src/ese/revertsnapshot.cxx | 2 +- .../src/os/blockcache/_hashedlrukcache.hxx | 10 ++++----- dev/ese/src/os/memory.cxx | 4 ++-- .../collection/collectionunit/CMakeLists.txt | 1 + .../collectionunit/redblacktree.cxx | 22 ------------------- 7 files changed, 19 insertions(+), 32 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 616d2a0d..8139dac7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -147,6 +147,11 @@ link_libraries( ${ESE_RELEASE_LIBRARIES} ) +# Global ESE project defines +add_compile_definitions( + OS_LAYER_USE_NEW_OVERRIDE +) + if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") # Using Microsoft Visual C++ diff --git a/dev/ese/published/inc/os/memory.hxx b/dev/ese/published/inc/os/memory.hxx index 8fd63660..6fbf4781 100644 --- a/dev/ese/published/inc/os/memory.hxx +++ b/dev/ese/published/inc/os/memory.hxx @@ -252,10 +252,12 @@ extern INT g_fMemCheck; #ifdef MEM_CHECK -INLINE const CHAR * const SzNewFile(); -INLINE ULONG UlNewLine(); +const CHAR * const SzNewFile(); +ULONG UlNewLine(); #endif +#ifdef OS_LAYER_USE_NEW_OVERRIDE + _Ret_maybenull_ _Post_writable_byte_size_(cbSize) INLINE void* __cdecl operator new( const size_t cbSize ) { @@ -290,6 +292,7 @@ inline void* __cdecl operator new[](size_t size) { return operator new(size); } inline void __cdecl operator delete[](void* p) { operator delete(p); } #pragma pop_macro("new") +#endif // OS_LAYER_USE_NEW_OVERRIDE // Page Memory Control diff --git a/dev/ese/src/ese/revertsnapshot.cxx b/dev/ese/src/ese/revertsnapshot.cxx index 3be348da..c29ee877 100644 --- a/dev/ese/src/ese/revertsnapshot.cxx +++ b/dev/ese/src/ese/revertsnapshot.cxx @@ -4773,7 +4773,7 @@ ERR CRBSDatabaseRevertContext::ErrRBSInitRootPageDeleteState( const LONG lRBSGen QWORD cbOffset = 0; QWORD cbRemaining = cbSize; - pbread = (BYTE*)PvOSMemoryPageAlloc( cbSize, NULL ); + pbread = (BYTE*)PvOSMemoryPageAlloc( (size_t)cbSize, NULL ); Alloc( pbread ); while ( cbRemaining > 0 ) diff --git a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx index b7f758cd..e236a8bc 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx @@ -3494,7 +3494,7 @@ class THashedLRUKCache const QWORD cbSlab = pc->CbChunkPerSlab(); const QWORD ibSlabMin = pc->m_pch->IbChunkHash(); const QWORD cSlab = pc->m_pch->CbChunkHash() / cbSlab; - const size_t cBitmap = roundup( cSlab, CHAR_BIT ); + const size_t cBitmap = roundup( (size_t)cSlab, CHAR_BIT ); const size_t cbBitmap = (size_t)CbFromCbit( cBitmap ); BYTE* rgbBitmap = NULL; IBitmapAPI* pbmLoaded = NULL; @@ -3515,12 +3515,12 @@ class THashedLRUKCache Alloc( rgbBitmap = new BYTE[ cbBitmap ] ); Alloc( pbmLoaded = new CFixedBitmap( rgbBitmap, cbBitmap ) ); - OnDebug( Alloc( rgCount = new LONG[ cSlab ] ) ); + OnDebug( Alloc( rgCount = new LONG[ (size_t)cSlab ] ) ); if ( rgCount ) { - memset( rgCount, 0, cSlab * sizeof( rgCount[ 0 ] ) ); + memset( rgCount, 0, (size_t)cSlab * sizeof( rgCount[ 0 ] ) ); } - Alloc( rgBucket = new CBucket[ cBucket ] ); + Alloc( rgBucket = new CBucket[ (size_t)cBucket ] ); Alloc( rgdwStandby = new DWORD[ cStandby ] ); memset( rgdwStandby, 0, cStandby * sizeof( rgdwStandby[ 0 ] ) ); Alloc( pcbpf = new CCachedBlockPresenceFilter( pc, @@ -3702,7 +3702,7 @@ class THashedLRUKCache m_rgCount( *prgCount ), m_cCluster( cCluster ), m_cBucket( cBucket ), - m_maskBucket( ( 1LL << Log2( cBucket ) ) - 1 ), + m_maskBucket( ( 1LL << Log2( (ULONG)cBucket ) ) - 1 ), m_rgBucket( *prgBucket ), m_cCuckooMax( max( 2, cCuckooMax ) ), m_cStandby( cStandby ), diff --git a/dev/ese/src/os/memory.cxx b/dev/ese/src/os/memory.cxx index 662f21be..24a2af4d 100644 --- a/dev/ese/src/os/memory.cxx +++ b/dev/ese/src/os/memory.cxx @@ -1837,11 +1837,11 @@ ERR ErrOSMemoryInit() #ifdef MEM_CHECK -INLINE const CHAR * const SzNewFile() +const CHAR * const SzNewFile() { return Postls()->szNewFile; } -INLINE ULONG UlNewLine() +ULONG UlNewLine() { return Postls()->ulNewLine; } diff --git a/test/ese/src/devlibtest/collection/collectionunit/CMakeLists.txt b/test/ese/src/devlibtest/collection/collectionunit/CMakeLists.txt index f6ab747a..eb4dfc7d 100644 --- a/test/ese/src/devlibtest/collection/collectionunit/CMakeLists.txt +++ b/test/ese/src/devlibtest/collection/collectionunit/CMakeLists.txt @@ -19,6 +19,7 @@ target_compile_definitions(COLLECTIONUNIT PRIVATE _MBCS BUILD_ENV_IS_EX ESE_FLAVOUR_IS_ESE + _HAS_AUTO_PTR_ETC ) target_include_directories(COLLECTIONUNIT PRIVATE diff --git a/test/ese/src/devlibtest/collection/collectionunit/redblacktree.cxx b/test/ese/src/devlibtest/collection/collectionunit/redblacktree.cxx index 7f34e156..d0fb128e 100644 --- a/test/ese/src/devlibtest/collection/collectionunit/redblacktree.cxx +++ b/test/ese/src/devlibtest/collection/collectionunit/redblacktree.cxx @@ -5,28 +5,6 @@ using namespace std; #include "collectionunittest.hxx" -// Borrowed from an implementation of random_shuffle(), used here -// to make shuffle() behave like random_shuffle. -struct _Rand_urng_from_func -{ // wrap rand() as a URNG - typedef unsigned int result_type; - - static result_type (min)() - { // return minimum possible generated value - return (0); - } - - static result_type (max)() - { // return maximum possible generated value - return (RAND_MAX); - } - - result_type operator()() - { // invoke rand() - return (_CSTD rand()); - } -}; - _Rand_urng_from_func _ShuffleRandFunc; // node constructor zeroes members and sets the color to red From d06df73e64687e195ff96c2c4bdd24f0f3bb06fe Mon Sep 17 00:00:00 2001 From: Nathanael Cheriere Date: Thu, 1 Sep 2022 08:01:20 +0000 Subject: [PATCH 011/102] Adding subsampling of cache traces in ESE Added the possibility to trace a subset of pages in cache according to the method presented in Waldspurger, C. A., Park, N., Garthwaite, A., & Ahmad, I. (2015). Efficient MRC construction with SHARDS. In FAST 15. This change makes possible to collect enough data to be able to simulate and study cache performance (for example with difference parameters such as the cache size) with a much lower overhead since only a few pages are traced. The subsampling is activated only if the keyword 'BFRESMGR' is set when collecting the trace, and the ratio of sampled pages can be set with the parameter 'JET_paramFlight_CacheTraceSamplingRatio'. [Substrate:217dee6e6b8e0d3732d4b7a1a661f01928cc6353] --- dev/ese/published/inc/jethdr.w | 3 +- dev/ese/published/inc/os/oseventtrace.g.hxx | 23 ++ dev/ese/published/inc/os/oseventtrace.hxx | 6 + dev/ese/src/_etw/Microsoft-ETW-ESE.mc | 2 + dev/ese/src/_etw/gengenetw.pl | 84 +++++- dev/ese/src/ese/bf.cxx | 313 +++++++++++--------- dev/ese/src/ese/jetapi.cxx | 18 +- dev/ese/src/ese/sysparamtable.g.cxx | 4 +- dev/ese/src/inc/_bf.hxx | 2 + dev/ese/src/noncore/interop/params.h | 1 + dev/ese/src/os/oseventtrace.cxx | 16 + 11 files changed, 325 insertions(+), 147 deletions(-) diff --git a/dev/ese/published/inc/jethdr.w b/dev/ese/published/inc/jethdr.w index de5806c6..66297b67 100644 --- a/dev/ese/published/inc/jethdr.w +++ b/dev/ese/published/inc/jethdr.w @@ -4167,8 +4167,7 @@ typedef enum #define JET_paramEnableShrinkDatabase 184 // Release space back to the OS when deleting data. This may require an OS feature of Sparse Files, and is subject to change. // end_PubEsent -// DEPRECATED: this was once used in the first implementation of DB shrink. -// #define JET_paramAutomaticShrinkDatabaseFreeSpaceThreshold 185 // DEPRECATED: Minimum threshold (percentage of the database size) that determines if the periodic shrink and/or shrink at JetTerm will take place or not. +#define JET_paramFlight_CacheTraceSamplingRatio 185 // Trace all cache events for 1 out of JET_paramFlight_CacheTraceSamplingRatio pages. // begin_PubEsent diff --git a/dev/ese/published/inc/os/oseventtrace.g.hxx b/dev/ese/published/inc/os/oseventtrace.g.hxx index 5c7cf772..f365af33 100644 --- a/dev/ese/published/inc/os/oseventtrace.g.hxx +++ b/dev/ese/published/inc/os/oseventtrace.g.hxx @@ -90,3 +90,26 @@ enum OSEventTraceGUID etguidOsTraceBase // general tags autogen'd before this one }; +enum OSEventTraceKeywordGUID : ULONGLONG +{ + _etguidKeywordError = 0x0000000000000001, + _etguidKeywordPerformance = 0x0000000000000002, + _etguidKeywordTrace = 0x0000000000000004, + _etguidKeywordTransaction = 0x0000000000000008, + _etguidKeywordSpace = 0x0000000000000010, + _etguidKeywordBF = 0x0000000000000020, + _etguidKeywordIO = 0x0000000000000040, + _etguidKeywordLOG = 0x0000000000000080, + _etguidKeywordTask = 0x0000000000000100, + _etguidKeywordTest = 0x0000000000000200, + _etguidKeywordBFRESMGR = 0x0000000000000400, + _etguidKeywordStationId = 0x0000000000000800, + _etguidKeywordJETTraceTag = 0x0000000000001000, + _etguidKeywordStallLatencies = 0x0000000000002000, + _etguidKeywordDataWorkingSet = 0x0000000000004000, + _etguidKeywordIOEX = 0x0000000000008000, + _etguidKeywordIOSESS = 0x0000000000010000, + _etguidKeywordSubstrateTelemetry = 0x0000000000020000, + _etguidKeywordCompressExp = 0x0000000100000000, +}; + diff --git a/dev/ese/published/inc/os/oseventtrace.hxx b/dev/ese/published/inc/os/oseventtrace.hxx index 89d37af3..6d4bf27c 100644 --- a/dev/ese/published/inc/os/oseventtrace.hxx +++ b/dev/ese/published/inc/os/oseventtrace.hxx @@ -23,6 +23,9 @@ INLINE BOOL FOSEventTraceEnabled(); #define OSEventTrace if ( FOSEventTraceEnabled() ) OSEventTrace_ +template< OSEventTraceKeywordGUID etguid > +INLINE BOOL FOSEventTraceKeywordEnabled(); + // The first 8 are generic reasons, resused per event, the next 248 are for whatever enum TraceStationIdentificationReason : BYTE // tsidr @@ -137,5 +140,8 @@ template INLINE BOOL COSEventTraceIdCheck::FAnnounceTime< _etguidSysStationId >( template INLINE BOOL COSEventTraceIdCheck::FAnnounceTime< _etguidIsamDbfilehdrInfo >( const TraceStationIdentificationReason tsidr ); template INLINE BOOL COSEventTraceIdCheck::FAnnounceTime< _etguidFmpStationId >( const TraceStationIdentificationReason tsidr ); +// Used to avoid subsampling if the keyword BFRESMGR is not set +template INLINE BOOL FOSEventTraceKeywordEnabled< _etguidKeywordBFRESMGR >(); + #endif // _OS_EVENT_TRACE_HXX_INCLUDED diff --git a/dev/ese/src/_etw/Microsoft-ETW-ESE.mc b/dev/ese/src/_etw/Microsoft-ETW-ESE.mc index 1795c94a..4d900ba4 100644 --- a/dev/ese/src/_etw/Microsoft-ETW-ESE.mc +++ b/dev/ese/src/_etw/Microsoft-ETW-ESE.mc @@ -107,6 +107,7 @@ ESE_PRE_GEN_BASE_FILE: be pre-processed with eseetw.pl. http://msdn.microsoft.com/en-us/library/aa382786(VS.85).aspx --> + + / ){ + # Line is passed through later + $fInKeywordList = 1; + } if ( $szMcLine =~ // ){ print hEtwNewMc $szMcLine; $fInTaskList = 1; @@ -466,7 +476,51 @@ sub szpad { $fInStringList = 1; } - if ( $fInTaskList ){ + if ( $fInKeywordList ){ + + if ( $szMcLine =~ /{Name} = -1; + $keyword->{Mask} = -1; + $fCurrParsingKeyword = 1; + } + + elsif ( $szMcLine =~ /\/>/ ){ + # Keywords should have a mask and a name + if ( !$fCurrParsingKeyword ){ + die "ERROR: Cannot complete the parsing of a keyword without starting it.\n"; + } + if ( $keyword->{Name} == -1 ){ + die "ERROR: Parsed a keyword without a name in $szEtwBaseMc.\n"; + } + if ( $keyword->{Mask} == -1 ){ + die "ERROR: Keyword $keyword->{Name} does not have a mask in $szEtwBaseMc.\n"; + } + push @rgsKeywords, $keyword; + $fCurrParsingKeyword = 0; + } + + elsif ( $szMcLine =~ /mask="0x[0-9]*"\s*$/ ){ + @MaskParsing = split /\s*"\s*/, $szMcLine; + $keyword->{Mask} = @MaskParsing[1]; + } + + elsif ( $szMcLine =~ /name=".*"\s*$/ ){ + @NameParsing = split /\s*"\s*/, $szMcLine; + $keyword->{Name} = @NameParsing[1]; + } + + if ( $szMcLine =~ // ){ + $fInKeywordList = 0; + } + + # pass the $line through unaltered for printing + print hEtwNewMc $szMcLine; + + } elsif ( $fInTaskList ){ if ( !$fCurrInsertDone ){ @@ -617,6 +671,12 @@ sub szpad { } +if ( $iPrintLevel >= 2 ){ + print "Keywords found in $szEtwBaseMc \n"; + for $i ( 0 ... $#rgsKeywords ){ + print "\t\tEtwKeyword[$i] = { $rgsKeywords[$i]{Name}, $rgsKeywords[$i]{Mask} }\n"; + } +} # ----------------------------------------------------------------------------------------------------------------------------------------- # @@ -652,11 +712,23 @@ sub szpad { } -print hOsEventTraceHxxHdrIns <<__OSEVENTTRACEHXXEPILOG__; +print hOsEventTraceHxxHdrIns <<__OSEVENTTRACEHXXMIDDLE__; etguidOsTraceBase // general tags autogen'd before this one }; +enum OSEventTraceKeywordGUID : ULONGLONG +{ +__OSEVENTTRACEHXXMIDDLE__ + +for $i ( 0 .. $#rgsKeywords ){ + # example: _etguidKeywordBfResMgr = 0x00000400, + print hOsEventTraceHxxHdrIns " _etguidKeyword$rgsKeywords[$i]{Name} = $rgsKeywords[$i]{Mask},\n"; +} + +print hOsEventTraceHxxHdrIns <<__OSEVENTTRACEHXXEPILOG__; +}; + __OSEVENTTRACEHXXEPILOG__ diff --git a/dev/ese/src/ese/bf.cxx b/dev/ese/src/ese/bf.cxx index ee9d3329..8560f44b 100644 --- a/dev/ese/src/ese/bf.cxx +++ b/dev/ese/src/ese/bf.cxx @@ -6274,6 +6274,24 @@ void BFIFTLTerm() #define ENABLE_BFFTL_TRACING #endif +ULONG g_ulSamplingRatio = 0; +ULONG g_ulSamplingSeed = 0; + +void BFICacheTraceSamplingInit( const ULONG ulSamplingRatio ) +{ + g_ulSamplingRatio = ulSamplingRatio; + g_ulSamplingSeed = (ULONG)TickOSTimeCurrent(); +} + +INLINE bool FBFISamplePage( const IFMP ifmp, const PGNO pgno ) +{ + if ( ( !FOSEventTraceKeywordEnabled< _etguidKeywordBFRESMGR >() ) || ( g_ulSamplingRatio <= 1 ) ) + { + return true; + } + return ( ( ( IFMPPGNO( ifmp, pgno ).Hash() + g_ulSamplingSeed ) % g_ulSamplingRatio ) == 0 ); +} + INLINE void BFITraceResMgrInit( const INT K, const double csecCorrelatedTouch, @@ -6323,21 +6341,24 @@ INLINE void BFITraceCachePage( const BFRequestTraceFlags bfrtf, const TraceContext& tc ) { - GetCurrUserTraceContext getutc; - const BYTE bClientType = getutc->context.nClientType; + if ( FBFISamplePage( pbf->ifmp, pbf->pgno ) ) + { + GetCurrUserTraceContext getutc; + const BYTE bClientType = getutc->context.nClientType; #ifdef ENABLE_BFFTL_TRACING #endif // ENABLE_BFFTL_TRACING - ETCacheCachePage( - tickCache, - pbf->ifmp, - pbf->pgno, - bflf, - bflt, - pctPriority, - bfrtf, - bClientType ); + ETCacheCachePage( + tickCache, + pbf->ifmp, + pbf->pgno, + bflf, + bflt, + pctPriority, + bfrtf, + bClientType ); + } } INLINE void BFITraceRequestPage( @@ -6349,43 +6370,47 @@ INLINE void BFITraceRequestPage( const BFRequestTraceFlags bfrtf, const TraceContext& tc ) { + if ( FBFISamplePage( pbf->ifmp, pbf->pgno ) ) + { #ifdef ENABLE_BFFTL_TRACING - GetCurrUserTraceContext getutc; - const BYTE bClientType = getutc->context.nClientType; + GetCurrUserTraceContext getutc; + const BYTE bClientType = getutc->context.nClientType; - (void)ErrBFIFTLTouch( - tickTouch, - pbf->ifmp, - pbf->pgno, - bflt, - bClientType, - pctPriority, - !!( bfrtf & bfrtfUseHistory ), - !!( bfrtf & bfrtfNewPage ), - !!( bfrtf & bfrtfNoTouch ), - !!( bfrtf & bfrtfDBScan ) ); + (void) ErrBFIFTLTouch( + tickTouch, + pbf->ifmp, + pbf->pgno, + bflt, + bClientType, + pctPriority, + !!( bfrtf & bfrtfUseHistory ), + !!( bfrtf & bfrtfNewPage ), + !!( bfrtf & bfrtfNoTouch ), + !!( bfrtf & bfrtfDBScan ) ); #endif // ENABLE_BFFTL_TRACING - if ( FOSEventTraceEnabled< _etguidCacheRequestPage >() ) - { + if ( FOSEventTraceEnabled< _etguidCacheRequestPage >() ) + { #ifndef ENABLE_BFFTL_TRACING - GetCurrUserTraceContext getutc; - const BYTE bClientType = getutc->context.nClientType; + GetCurrUserTraceContext getutc; + const BYTE bClientType = getutc->context.nClientType; #endif - OSEventTrace_( - _etguidCacheRequestPage, - 10, - &tickTouch, - &(pbf->ifmp), - &(pbf->pgno), - &bflf, - &( ( (CPAGE::PGHDR *)( pbf->pv ) )->objidFDP ), - &( ( (CPAGE::PGHDR *)( pbf->pv ) )->fFlags ), - &bflt, - &pctPriority, - &bfrtf, - &bClientType ); + + OSEventTrace_( + _etguidCacheRequestPage, + 10, + &tickTouch, + &( pbf->ifmp ), + &( pbf->pgno ), + &bflf, + &( ( (CPAGE::PGHDR*) ( pbf->pv ) )->objidFDP ), + &( ( (CPAGE::PGHDR*) ( pbf->pv ) )->fFlags ), + &bflt, + &pctPriority, + &bfrtf, + &bClientType ); + } } } @@ -6393,11 +6418,14 @@ INLINE void BFITraceMarkPageAsSuperCold( const IFMP ifmp, const PGNO pgno ) { + if ( FBFISamplePage( ifmp, pgno ) ) + { #ifdef ENABLE_BFFTL_TRACING - (void)ErrBFIFTLMarkAsSuperCold( ifmp, pgno ); + ( void )ErrBFIFTLMarkAsSuperCold( ifmp, pgno ); #endif // ENABLE_BFFTL_TRACING - ETMarkPageAsSuperCold( TickOSTimeCurrent(), ifmp, pgno ); + ETMarkPageAsSuperCold( TickOSTimeCurrent(), ifmp, pgno ); + } } INLINE void BFITraceEvictPage( @@ -6407,15 +6435,18 @@ INLINE void BFITraceEvictPage( const ERR errBF, const ULONG bfef ) { - const ULONG pctPriority = 0; // Not relevant for eviction anymore. - + if ( FBFISamplePage( ifmp, pgno ) ) + { + const ULONG pctPriority = 0; // Not relevant for eviction anymore. + #ifdef ENABLE_BFFTL_TRACING - (void)ErrBFIFTLEvict( ifmp, pgno, fCurrentVersion, errBF, bfef, pctPriority ); + ( void )ErrBFIFTLEvict( ifmp, pgno, fCurrentVersion, errBF, bfef, pctPriority ); #endif // ENABLE_BFFTL_TRACING - const TICK tickEvictPage = TickOSTimeCurrent(); + const TICK tickEvictPage = TickOSTimeCurrent(); - ETCacheEvictPage( tickEvictPage, ifmp, pgno, fCurrentVersion, errBF, bfef, pctPriority ); + ETCacheEvictPage( tickEvictPage, ifmp, pgno, fCurrentVersion, errBF, bfef, pctPriority ); + } } INLINE void BFITraceDirtyPage( @@ -6423,50 +6454,53 @@ INLINE void BFITraceDirtyPage( const BFDirtyFlags bfdf, const TraceContext& tc ) { - auto tick = TickOSTimeCurrent(); - static_assert( sizeof(tick) == sizeof(DWORD), "Compiler magic failing." ); - // Note that pbf->lgposModify contains the current lgposModify of the buffer, prior - // to it being updated to reflect the new lgposModify that is triggering the dirty - // operation. Each setting of lgposModify will generate its own trace so that is - // more suitable to determine the lgpos associated with the dirty operation. + if ( FBFISamplePage( pbf->ifmp, pbf->pgno ) ) + { + auto tick = TickOSTimeCurrent(); + static_assert( sizeof( tick ) == sizeof( DWORD ), "Compiler magic failing." ); + + // Note that pbf->lgposModify contains the current lgposModify of the buffer, prior + // to it being updated to reflect the new lgposModify that is triggering the dirty + // operation. Each setting of lgposModify will generate its own trace so that is + // more suitable to determine the lgpos associated with the dirty operation. - // Need to read atomically because removing undo info may change it from under us - // without a latch. + // Need to read atomically because removing undo info may change it from under us + // without a latch. - const LGPOS lgposModifyRead = pbf->lgposModify.LgposAtomicRead(); - const ULONG lgposModifyLGen = (ULONG)lgposModifyRead.lGeneration; - const USHORT lgposModifyISec = lgposModifyRead.isec; - const USHORT lgposModifyIb = lgposModifyRead.ib; + const LGPOS lgposModifyRead = pbf->lgposModify.LgposAtomicRead(); + const ULONG lgposModifyLGen = (ULONG) lgposModifyRead.lGeneration; + const USHORT lgposModifyISec = lgposModifyRead.isec; + const USHORT lgposModifyIb = lgposModifyRead.ib; - Assert( (LONG)lgposModifyLGen == lgposModifyRead.lGeneration ); + Assert( (LONG) lgposModifyLGen == lgposModifyRead.lGeneration ); #ifdef ENABLE_BFFTL_TRACING - (void)ErrBFIFTLDirty( pbf->ifmp, pbf->pgno, bfdf, lgposModifyLGen, lgposModifyISec, lgposModifyIb ); + ( void )ErrBFIFTLDirty( pbf->ifmp, pbf->pgno, bfdf, lgposModifyLGen, lgposModifyISec, lgposModifyIb ); #endif // ENABLE_BFFTL_TRACING - Assert( CmpLgpos( pbf->lgposModify.LgposAtomicRead(), lgposModifyRead ) >= 0 ); + Assert( CmpLgpos( pbf->lgposModify.LgposAtomicRead(), lgposModifyRead ) >= 0 ); - const CPAGE::PGHDR * ppghdr = (const CPAGE::PGHDR *)pbf->pv; - GetCurrUserTraceContext getutc; + const CPAGE::PGHDR* ppghdr = (const CPAGE::PGHDR*) pbf->pv; + GetCurrUserTraceContext getutc; - // Iorp() is reserved for the loweset level action that caused an IO, just above the IO layer (e.g. BF's reason for initiating an IO). - // Dirtying a page isn't going to cause an IO directly, so iorp should be none. But it doesn't hurt telemetry if we do emit an iorp here. - // These are some of the culprits who push an iorp because they call the IO layer directly, which expects an iorp. - // But they also end up leaking iorp into the BF Api. - // FUTURE-2022-04-14-SOMEONE - If we ever save tc on the BF, consider fixing the iorp leak. - Expected( tc.iorReason.Iorp() == iorpNone || - tc.iorReason.Iorp() == iorpDatabaseShrink || - tc.iorReason.Iorp() == iorpDatabaseTrim || - tc.iorReason.Iorp() == iorpPatchFix || - tc.iorReason.Iorp() == iorpSPDatabaseInlineZero || - tc.iorReason.Iorp() == iorpBFLatch ); // page patch - - if ( pbf->bfdf < bfdfDirty /* first "proper" dirty */ ) - { - // There is no point in logging itagMicFree, cbfree, dbtime because they would be the - // same as the most recent read page trace at this point. - ETCacheFirstDirtyPage( + // Iorp() is reserved for the loweset level action that caused an IO, just above the IO layer (e.g. BF's reason for initiating an IO). + // Dirtying a page isn't going to cause an IO directly, so iorp should be none. But it doesn't hurt telemetry if we do emit an iorp here. + // These are some of the culprits who push an iorp because they call the IO layer directly, which expects an iorp. + // But they also end up leaking iorp into the BF Api. + // FUTURE-2022-04-14-SOMEONE - If we ever save tc on the BF, consider fixing the iorp leak. + Expected( tc.iorReason.Iorp() == iorpNone || + tc.iorReason.Iorp() == iorpDatabaseShrink || + tc.iorReason.Iorp() == iorpDatabaseTrim || + tc.iorReason.Iorp() == iorpPatchFix || + tc.iorReason.Iorp() == iorpSPDatabaseInlineZero || + tc.iorReason.Iorp() == iorpBFLatch ); // page patch + + if ( pbf->bfdf < bfdfDirty /* first "proper" dirty */ ) + { + // There is no point in logging itagMicFree, cbfree, dbtime because they would be the + // same as the most recent read page trace at this point. + ETCacheFirstDirtyPage( tick, pbf->ifmp, pbf->pgno, @@ -6486,9 +6520,9 @@ INLINE void BFITraceDirtyPage( tc.iorReason.Ioru(), tc.iorReason.Iorf(), tc.nParentObjectClass ); - } + } - ETCacheDirtyPage( + ETCacheDirtyPage( tick, pbf->ifmp, pbf->pgno, @@ -6508,85 +6542,92 @@ INLINE void BFITraceDirtyPage( tc.iorReason.Ioru(), tc.iorReason.Iorf(), tc.nParentObjectClass ); + } } INLINE void BFITraceSetLgposModify( const PBF pbf, const LGPOS& lgposModify ) { - auto tick = TickOSTimeCurrent(); - static_assert( sizeof(tick) == sizeof(DWORD), "Compiler magic failing." ); + if ( FBFISamplePage( pbf->ifmp, pbf->pgno ) ) + { + auto tick = TickOSTimeCurrent(); + static_assert( sizeof( tick ) == sizeof( DWORD ), "Compiler magic failing." ); #ifdef ENABLE_BFFTL_TRACING - const ULONG lgposModifyLGen = (ULONG)lgposModify.lGeneration; - const USHORT lgposModifyISec = lgposModify.isec; - const USHORT lgposModifyIb = lgposModify.ib; + const ULONG lgposModifyLGen = (ULONG) lgposModify.lGeneration; + const USHORT lgposModifyISec = lgposModify.isec; + const USHORT lgposModifyIb = lgposModify.ib; - Assert( (LONG)lgposModifyLGen == lgposModify.lGeneration ); + Assert( (LONG) lgposModifyLGen == lgposModify.lGeneration ); - (void)ErrBFIFTLSetLgposModify( pbf->ifmp, pbf->pgno, lgposModifyLGen, lgposModifyISec, lgposModifyIb ); + (void) ErrBFIFTLSetLgposModify( pbf->ifmp, pbf->pgno, lgposModifyLGen, lgposModifyISec, lgposModifyIb ); #endif // ENABLE_BFFTL_TRACING - ETCacheSetLgposModify( + ETCacheSetLgposModify( tick, pbf->ifmp, pbf->pgno, lgposModify.qw ); + } } INLINE void BFITraceWritePage( const PBF pbf, const FullTraceContext& tc ) { - const ULONG bfdfTrace = (ULONG)pbf->bfdf; // We need to put this on the stack because & isn't valid on a bitfield - auto tick = TickOSTimeCurrent(); + if ( FBFISamplePage( pbf->ifmp, pbf->pgno ) ) + { + const ULONG bfdfTrace = (ULONG) pbf->bfdf; // We need to put this on the stack because & isn't valid on a bitfield + auto tick = TickOSTimeCurrent(); - Assert( tc.etc.iorReason.Iorp() != iorpNone ); + Assert( tc.etc.iorReason.Iorp() != iorpNone ); #ifdef ENABLE_BFFTL_TRACING - // Update: Now that we're FTL logging from the IO completion, it can cause IO issue - // and sync complete below the existing completion: - // ese!OSSYNC::CLockDeadlockDetectionInfo::AssertCleanApiExit+0xd4 [d:\src\e16\esemulti\sources\dev\ese\published\inc\sync.hxx @ 3408] - // ese!OSDiskIIOThreadCompleteWithErr+0x8a6 [d:\src\e16\esemulti\sources\dev\ese\src\os\osdisk.cxx @ 6984] - // ese!COSFile::ErrIOAsync+0x6ef [d:\src\e16\esemulti\sources\dev\ese\src\os\osfile.cxx @ 1811] - // ese!COSFile::ErrIOWrite+0x2c2 [d:\src\e16\esemulti\sources\dev\ese\src\os\osfile.cxx @ 1111] - // ese!CFastTraceLog::ErrFTLIFlushBuffer+0x9fb [d:\src\e16\esemulti\sources\dev\ese\src\os\trace.cxx @ 2461] - // ese!CFastTraceLog::ErrFTLFlushBuffer+0x3d [d:\src\e16\esemulti\sources\dev\ese\src\os\trace.cxx @ 2494] - // ese!CFastTraceLogBuffer::ErrFTLBTrace+0x2b6 [d:\src\e16\esemulti\distrib\private\inc\trace.hxx @ 598] - // ese!CFastTraceLog::ErrFTLTrace+0x90 [d:\src\e16\esemulti\sources\dev\ese\src\os\trace.cxx @ 2524] - // ese!ErrBFIFTLWrite+0xc7 [d:\src\e16\esemulti\sources\dev\ese\published\inc\bf\bfftl.hxx @ 337] - // ese!BFITraceWritePage+0x111 [d:\src\e16\esemulti\sources\dev\ese\src\ese\bf.cxx @ 6035] - // ese!BFIAsyncWriteComplete+0xc1 [d:\src\e16\esemulti\sources\dev\ese\src\ese\bf.cxx @ 25254] - // ese!COSFile::IOComplete+0xe5 [d:\src\e16\esemulti\sources\dev\ese\src\os\osfile.cxx @ 1592] - // ese!COSFile::IOComplete_+0x26 [d:\src\e16\esemulti\sources\dev\ese\src\os\osfile.cxx @ 1565] - // ese!OSDiskIIOThreadCompleteWithErr+0x907 [d:\src\e16\esemulti\sources\dev\ese\src\os\osdisk.cxx @ 6999] - // ese!OSDiskIIOThreadIComplete+0x150 [d:\src\e16\esemulti\sources\dev\ese\src\os\osdisk.cxx @ 7047] - // ese!CTaskManager::TMIDispatch+0x800 [d:\src\e16\esemulti\sources\dev\ese\src\os\task.cxx @ 766] - // Ultimately this could be fixed by [re]moving FTL tracing off the existing IO mechanism and using - // it's own NT API writing calls, which would also fix the other issue in ErrBFIPrereadPage() at the - // same time. - //(void)ErrBFIFTLWrite( pbf->ifmp, pbf->pgno, BFDirtyFlags( pbf->bfdf ), iorp ); + // Update: Now that we're FTL logging from the IO completion, it can cause IO issue + // and sync complete below the existing completion: + // ese!OSSYNC::CLockDeadlockDetectionInfo::AssertCleanApiExit+0xd4 [d:\src\e16\esemulti\sources\dev\ese\published\inc\sync.hxx @ 3408] + // ese!OSDiskIIOThreadCompleteWithErr+0x8a6 [d:\src\e16\esemulti\sources\dev\ese\src\os\osdisk.cxx @ 6984] + // ese!COSFile::ErrIOAsync+0x6ef [d:\src\e16\esemulti\sources\dev\ese\src\os\osfile.cxx @ 1811] + // ese!COSFile::ErrIOWrite+0x2c2 [d:\src\e16\esemulti\sources\dev\ese\src\os\osfile.cxx @ 1111] + // ese!CFastTraceLog::ErrFTLIFlushBuffer+0x9fb [d:\src\e16\esemulti\sources\dev\ese\src\os\trace.cxx @ 2461] + // ese!CFastTraceLog::ErrFTLFlushBuffer+0x3d [d:\src\e16\esemulti\sources\dev\ese\src\os\trace.cxx @ 2494] + // ese!CFastTraceLogBuffer::ErrFTLBTrace+0x2b6 [d:\src\e16\esemulti\distrib\private\inc\trace.hxx @ 598] + // ese!CFastTraceLog::ErrFTLTrace+0x90 [d:\src\e16\esemulti\sources\dev\ese\src\os\trace.cxx @ 2524] + // ese!ErrBFIFTLWrite+0xc7 [d:\src\e16\esemulti\sources\dev\ese\published\inc\bf\bfftl.hxx @ 337] + // ese!BFITraceWritePage+0x111 [d:\src\e16\esemulti\sources\dev\ese\src\ese\bf.cxx @ 6035] + // ese!BFIAsyncWriteComplete+0xc1 [d:\src\e16\esemulti\sources\dev\ese\src\ese\bf.cxx @ 25254] + // ese!COSFile::IOComplete+0xe5 [d:\src\e16\esemulti\sources\dev\ese\src\os\osfile.cxx @ 1592] + // ese!COSFile::IOComplete_+0x26 [d:\src\e16\esemulti\sources\dev\ese\src\os\osfile.cxx @ 1565] + // ese!OSDiskIIOThreadCompleteWithErr+0x907 [d:\src\e16\esemulti\sources\dev\ese\src\os\osdisk.cxx @ 6999] + // ese!OSDiskIIOThreadIComplete+0x150 [d:\src\e16\esemulti\sources\dev\ese\src\os\osdisk.cxx @ 7047] + // ese!CTaskManager::TMIDispatch+0x800 [d:\src\e16\esemulti\sources\dev\ese\src\os\task.cxx @ 766] + // Ultimately this could be fixed by [re]moving FTL tracing off the existing IO mechanism and using + // it's own NT API writing calls, which would also fix the other issue in ErrBFIPrereadPage() at the + // same time. + //(void)ErrBFIFTLWrite( pbf->ifmp, pbf->pgno, BFDirtyFlags( pbf->bfdf ), iorp ); #endif // ENABLE_BFFTL_TRACING - ETCacheWritePage( - tick, - pbf->ifmp, - pbf->pgno, - (((CPAGE::PGHDR *)(pbf->pv))->objidFDP), - (((CPAGE::PGHDR *)(pbf->pv))->fFlags), - bfdfTrace, - tc.utc.context.dwUserID, - tc.utc.context.nOperationID, - tc.utc.context.nOperationType, - tc.utc.context.nClientType, - tc.utc.context.fFlags, - tc.utc.dwCorrelationID, - tc.etc.iorReason.Iorp(), - tc.etc.iorReason.Iors(), - tc.etc.iorReason.Iort(), - tc.etc.iorReason.Ioru(), - tc.etc.iorReason.Iorf(), - tc.etc.nParentObjectClass ); + ETCacheWritePage( + tick, + pbf->ifmp, + pbf->pgno, + ( ( (CPAGE::PGHDR*) ( pbf->pv ) )->objidFDP ), + ( ( (CPAGE::PGHDR*) ( pbf->pv ) )->fFlags ), + bfdfTrace, + tc.utc.context.dwUserID, + tc.utc.context.nOperationID, + tc.utc.context.nOperationType, + tc.utc.context.nClientType, + tc.utc.context.fFlags, + tc.utc.dwCorrelationID, + tc.etc.iorReason.Iorp(), + tc.etc.iorReason.Iors(), + tc.etc.iorReason.Iort(), + tc.etc.iorReason.Ioru(), + tc.etc.iorReason.Iorf(), + tc.etc.nParentObjectClass ); + } } diff --git a/dev/ese/src/ese/jetapi.cxx b/dev/ese/src/ese/jetapi.cxx index 1f694b4c..742d450f 100644 --- a/dev/ese/src/ese/jetapi.cxx +++ b/dev/ese/src/ese/jetapi.cxx @@ -6217,7 +6217,7 @@ SetCacheSizeRange( CJetParam* const pjetparam, PCWSTR wszParam ) { ERR err = JET_errSuccess; - + Call( CJetParam::SetInteger( pjetparam, pinst, ppib, ulParam, wszParam ) ); Call( ErrBFConsumeSettings( bfcsCacheSize, ifmpNil ) ); @@ -6225,6 +6225,22 @@ SetCacheSizeRange( CJetParam* const pjetparam, return err; } +ERR +SetCacheTraceSamplingRatio( CJetParam* const pjetparam, + INST* const pinst, + PIB* const ppib, + const ULONG_PTR ulParam, + PCWSTR wszParam ) +{ + ERR err = JET_errSuccess; + + Call( CJetParam::SetInteger( pjetparam, pinst, ppib, ulParam, wszParam ) ); + BFICacheTraceSamplingInit( (ULONG)ulParam ); + +HandleError: + return err; +} + ERR SetCheckpointDepthMax( CJetParam* const pjetparam, INST* const pinst, diff --git a/dev/ese/src/ese/sysparamtable.g.cxx b/dev/ese/src/ese/sysparamtable.g.cxx index 7a1eed6d..e661d2bc 100644 --- a/dev/ese/src/ese/sysparamtable.g.cxx +++ b/dev/ese/src/ese/sysparamtable.g.cxx @@ -198,7 +198,7 @@ JetParam g_rgparamRaw[] = NORMAL_PARAM(JET_paramHungIOActions, CJetParam::typeInteger, 1, 1, 1, 1, JET_bitNil, (JET_bitHungIOEvent|JET_bitHungIOCancel|JET_bitHungIODebug|JET_bitHungIOEnforce|JET_bitHungIOTimeout), JET_bitHungIOEvent), NORMAL_PARAM(JET_paramMinDataForXpress, CJetParam::typeInteger, 1, 1, 1, 1, 0, 2147483647, 1024), CUSTOM_PARAM3(JET_paramEnableShrinkDatabase, CJetParam::typeGrbit, 0, 0, 0, 1, 0, 0xffff, JET_paramEnableShrinkDatabase_DEFAULT, JET_paramEnableShrinkDatabase_DEFAULT, CJetParam::GetInteger, SetShrinkDatabaseParam, CJetParam::CloneDefault), - ILLEGAL_PARAM(185), + CUSTOM_PARAM3(JET_paramFlight_CacheTraceSamplingRatio, CJetParam::typeInteger, 0, 1, 0, 0, 1, 2147483647, 1, 1, CJetParam::GetInteger, SetCacheTraceSamplingRatio, CJetParam::CloneDefault), NORMAL_PARAM(JET_paramProcessFriendlyName, CJetParam::typeString, 0, 1, 1, 1, 0, JET_cbNameMost, L""), NORMAL_PARAM(JET_paramDurableCommitCallback, CJetParam::typePointer, 1, 0, 0, 1, 0, -1, NULL), IGNORED_PARAM(JET_paramEnableSqm, CJetParam::typeInteger, 0, 0, 0, 1, JET_sqmDisable, JET_sqmFromCEIP, JET_sqmEnable), @@ -431,7 +431,7 @@ static_assert( JET_paramHungIOThreshold == 181, "The order of defintion for JET_ static_assert( JET_paramHungIOActions == 182, "The order of defintion for JET_paramHungIOActions in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); static_assert( JET_paramMinDataForXpress == 183, "The order of defintion for JET_paramMinDataForXpress in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); static_assert( JET_paramEnableShrinkDatabase == 184, "The order of defintion for JET_paramEnableShrinkDatabase in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); -static_assert( 185 == 185, "The order of defintion for 185 in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); +static_assert( JET_paramFlight_CacheTraceSamplingRatio == 185, "The order of defintion for JET_paramFlight_CacheTraceSamplingRatio in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); static_assert( JET_paramProcessFriendlyName == 186, "The order of defintion for JET_paramProcessFriendlyName in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); static_assert( JET_paramDurableCommitCallback == 187, "The order of defintion for JET_paramDurableCommitCallback in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); static_assert( JET_paramEnableSqm == 188, "The order of defintion for JET_paramEnableSqm in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); diff --git a/dev/ese/src/inc/_bf.hxx b/dev/ese/src/inc/_bf.hxx index 5678969d..22323fc4 100644 --- a/dev/ese/src/inc/_bf.hxx +++ b/dev/ese/src/inc/_bf.hxx @@ -601,6 +601,8 @@ void BFIFTLTerm(); // BF tracing +void BFICacheTraceSamplingInit( const ULONG ulSamplingRatio ); + INLINE void BFITraceResMgrInit( const INT K, const double csecCorrelatedTouch, diff --git a/dev/ese/src/noncore/interop/params.h b/dev/ese/src/noncore/interop/params.h index 8aa4b211..2ad87dfc 100644 --- a/dev/ese/src/noncore/interop/params.h +++ b/dev/ese/src/noncore/interop/params.h @@ -222,6 +222,7 @@ MSINTERNAL enum class MJET_PARAM HungIOActions = 182, // A set of actions to be taken on IOs that appear hung. MinDataForXpress = 183, // Smallest amount of data that should be compressed with xpress compression. EnableShrinkDatabase = 184, // Release space back to the OS when deleting data. This may require an OS feature of Sparse Files, and is subject to change. + Flight_CacheTraceSamplingRatio = 185, // Trace all cache events for 1 out of JET_paramFlight_CacheTraceSamplingRatio pages. ProcessFriendlyName = 186, // Friendly name for this instance of the process (e.g. performance counter global instance name, event logs). DurableCommitCallback = 187, // callback for when log is flushed EnableSqm = 188, // Deprecated / ignored param. diff --git a/dev/ese/src/os/oseventtrace.cxx b/dev/ese/src/os/oseventtrace.cxx index c613debd..5374a87e 100644 --- a/dev/ese/src/os/oseventtrace.cxx +++ b/dev/ese/src/os/oseventtrace.cxx @@ -267,3 +267,19 @@ INLINE BOOL FOSEventTraceEnabled() // return !fThanks; } +template< OSEventTraceKeywordGUID etguid > +INLINE BOOL FOSEventTraceKeywordEnabled() +{ + if ( g_fDisableTracingForced ) + { + return fFalse; + } + +#ifdef ESENT + MCGEN_TRACE_CONTEXT* p = &Microsoft_Windows_ESE_Context; +#else + MCGEN_TRACE_CONTEXT* p = &Microsoft_Exchange_ESE_Context; +#endif + + return ( etguid & p->MatchAnyKeyword ); +} From 1f51196503150b91f07cb068f4dd5edb1619525e Mon Sep 17 00:00:00 2001 From: Umair Ahmad Date: Fri, 2 Sep 2022 03:34:44 +0000 Subject: [PATCH 012/102] Modify ECC correction code to detect and repair bit flips on the checksum format flag. If old checksum format is detected and the checksums don't match, then rechecksum with new format and compare again. If the ECC checksum indicates a correctable error, then it must be the new checksum and the bit can be corrected. [Substrate:ae6b019edfe10005d3d3aeb9f3758cd0975ce848] --- dev/ese/src/_esefile/xsum.cxx | 43 ++++++++++++++++++++------ dev/ese/src/ese/_osu/checksumu.cxx | 49 ++++++++++++++++++++++-------- dev/ese/src/ese/checksum_test.cxx | 24 ++++++--------- 3 files changed, 80 insertions(+), 36 deletions(-) diff --git a/dev/ese/src/_esefile/xsum.cxx b/dev/ese/src/_esefile/xsum.cxx index 1f495a56..a3b7620a 100644 --- a/dev/ese/src/_esefile/xsum.cxx +++ b/dev/ese/src/_esefile/xsum.cxx @@ -102,19 +102,18 @@ static PAGECHECKSUM ChecksumFromPage( const void * const pv, const PAGETYPE page } // ================================================================ -static PAGECHECKSUM ComputePageChecksum( +LOCAL PAGECHECKSUM ComputePageChecksum_( const void* const pv, const UINT cb, const PAGETYPE pagetype, const ULONG pgno, - // set fNew to compute new ECC for a page (R/W wrt the large page!!) - // reset fNew to computer ECC for verification purpose (R/O wrt the page) - const BOOL fNew = fFalse ) + const BOOL fNewChecksumFormat, + const BOOL fWriteChecksum ) // ================================================================ { if( FPageHasLongChecksum( pagetype ) ) { - if( FPageHasNewChecksumFormat( pv, pagetype ) ) + if( fNewChecksumFormat ) { // large pages (16/32kiB) always have new checksum format PAGECHECKSUM pgChecksum; @@ -131,7 +130,7 @@ static PAGECHECKSUM ComputePageChecksum( // write checksums into designated location in header block // so checksum for header block can protect them as well - if ( fNew ) + if ( fWriteChecksum ) { // cast RO ( const void* ) to RW ( PGHDR2* ) PGHDR2* const pPgHdr2 = ( PGHDR2* )pv; @@ -154,6 +153,18 @@ static PAGECHECKSUM ComputePageChecksum( return ChecksumOldFormat((unsigned char *)pv, cb); } +// ================================================================ +static PAGECHECKSUM ComputePageChecksum( + const void* const pv, + const UINT cb, + const PAGETYPE pagetype, + const ULONG pgno, + const BOOL fWriteChecksum = fFalse ) +// ================================================================ +{ + return ComputePageChecksum_( pv, cb, pagetype, pgno, FPageHasNewChecksumFormat( pv, pagetype ), fWriteChecksum ); +} + // ================================================================ inline void FlipBit( void * const pv, const INT ibitOffset ) // ================================================================ @@ -215,7 +226,7 @@ static void TryFixPage( const UINT cblk = fSmallPage ? 1 : cxeChecksumPerPage; XECHECKSUMERROR rgErr[ cxeChecksumPerPage ] = { xeChecksumNoError, }; - UINT rgibitCorrupted[ cxeChecksumPerPage ] = { IbitNewChecksumFormatFlag( pagetype ), UINT_MAX, UINT_MAX, UINT_MAX, }; + UINT rgibitCorrupted[ cxeChecksumPerPage ] = { UINT_MAX, UINT_MAX, UINT_MAX, UINT_MAX, }; UINT ibitCorrupted = UINT_MAX; // work out correction @@ -292,9 +303,23 @@ void ChecksumAndPossiblyFixPage( *pchecksumActual = ComputePageChecksum( pv, cb, pagetype, pgno ); const BOOL fNewChecksumFormat = FPageHasNewChecksumFormat( pv, pagetype ); - if( *pchecksumActual != *pchecksumExpected && fNewChecksumFormat ) + if ( *pchecksumActual != *pchecksumExpected && *pchecksumExpected != PAGECHECKSUM{ 0 } ) { - TryFixPage( pv, cb, pagetype, fCorrectError, pfCorrectableError, pibitCorrupted, *pchecksumExpected, *pchecksumActual ); + // Try correcting bit flips in the page for non-zero pages. (Pages whose checksum isn't 0). + // Note that a valid old format checksum can be 0 for some combination of non-zero bits on the page. + // A valid new checksum can't be zero because it comprises of two complimentary checksums, + // both of which can only be 0 if all of the bits on the page are 0 or 1. + // See checksum_amd64.cxx for a detailed description of why that is true. + + // Old checksum format doesn't support error correction. + // But it could be that the checksum was new format and the bit that indicated formats got flipped. + // Compute the checksum as new format and try fixing it. + // If it is fixable, then we know that fNewChecksumFormat bit on the page got flipped. + PAGECHECKSUM checksumNewFormat = fNewChecksumFormat ? + *pchecksumActual : + ComputePageChecksum_( pv, cb, pagetype, pgno, fTrue, fFalse ); + + TryFixPage( pv, cb, pagetype, fCorrectError, pfCorrectableError, pibitCorrupted, *pchecksumExpected, checksumNewFormat ); Assert( ( *pfCorrectableError && *pibitCorrupted != -1 ) || ( !*pfCorrectableError && *pibitCorrupted == -1 ) ); // no point in re-computing the checksum if we haven't done any changes diff --git a/dev/ese/src/ese/_osu/checksumu.cxx b/dev/ese/src/ese/_osu/checksumu.cxx index c124d757..d821aa77 100644 --- a/dev/ese/src/ese/_osu/checksumu.cxx +++ b/dev/ese/src/ese/_osu/checksumu.cxx @@ -108,8 +108,8 @@ UINT IbitNewChecksumFormatFlag( const PAGETYPE pagetype ) // for database pages, the page flags are stored in the 10th // unsigned long. The format bit is 0x2000, which is the 14th bit // - Assert( OffsetOf( CPAGE::PGHDR, fFlags ) * 8 == 9 * 32 ); - Assert( CPAGE::fPageNewChecksumFormat == ( 1 << 13 ) ); + static_assert( OffsetOf( CPAGE::PGHDR, fFlags ) * 8 == 9 * 32 ); + static_assert( CPAGE::fPageNewChecksumFormat == ( 1 << 13 ) ); return ( 9 * 32 ) + 13; } @@ -267,19 +267,18 @@ ULONG CbBlockSize( const ULONG cb ) } // ================================================================ -static PAGECHECKSUM ComputePageChecksum( +LOCAL PAGECHECKSUM ComputePageChecksum_( const void* const pv, const UINT cb, const PAGETYPE pagetype, const ULONG pgno, - // set fNew to compute new ECC for a page (R/W wrt the large page!!) - // reset fNew to computer ECC for verification purpose (R/O wrt the page) - const BOOL fNew = fFalse ) + const BOOL fNewChecksumFormat, + const BOOL fWriteChecksum ) // ================================================================ { if( FPageHasLongChecksum( pagetype ) ) { - if( FPageHasNewChecksumFormat( pv, pagetype ) ) + if( fNewChecksumFormat ) { // large pages (16/32kiB) always have new checksum format PAGECHECKSUM pgChecksum; @@ -311,7 +310,7 @@ static PAGECHECKSUM ComputePageChecksum( // write checksums into designated location in header block // so checksum for header block can protect them as well - if ( fNew ) + if ( fWriteChecksum ) { // cast RO pv to RW pPgHdr2 CPAGE::PGHDR2* const pPgHdr2 = ( CPAGE::PGHDR2* )pv; @@ -334,6 +333,18 @@ static PAGECHECKSUM ComputePageChecksum( return ChecksumOldFormat((unsigned char *)pv, cb); } +// ================================================================ +static PAGECHECKSUM ComputePageChecksum( + const void* const pv, + const UINT cb, + const PAGETYPE pagetype, + const ULONG pgno, + const BOOL fWriteChecksum = fFalse ) +// ================================================================ +{ + return ComputePageChecksum_( pv, cb, pagetype, pgno, FPageHasNewChecksumFormat( pv, pagetype ), fWriteChecksum ); +} + // ================================================================ enum XECHECKSUMERROR { xeChecksumNoError = 0, xeChecksumCorrectableError = -13, xeChecksumFatalError = -29, }; @@ -383,7 +394,7 @@ static void TryFixPage( UINT ibT = 0; XECHECKSUMERROR rgErr[ cxeChecksumPerPage ] = { xeChecksumNoError, }; - UINT rgibitCorrupted[ cxeChecksumPerPage ] = { IbitNewChecksumFormatFlag( pagetype ), UINT_MAX, UINT_MAX, UINT_MAX, }; + UINT rgibitCorrupted[ cxeChecksumPerPage ] = { UINT_MAX, UINT_MAX, UINT_MAX, UINT_MAX, }; UINT ibitCorrupted = UINT_MAX; // work out correction @@ -488,11 +499,25 @@ void ChecksumAndPossiblyFixPage( *pchecksumExpected = ChecksumFromPage( pv, cb, pagetype ); *pchecksumActual = ComputePageChecksum( pv, cb, pagetype, pgno ); - + const BOOL fNewChecksumFormat = FPageHasNewChecksumFormat( pv, pagetype ); - if( *pchecksumActual != *pchecksumExpected && fNewChecksumFormat ) + if ( *pchecksumActual != *pchecksumExpected && *pchecksumExpected != PAGECHECKSUM{ 0 } ) { - TryFixPage( pv, cb, pagetype, fCorrectError, pfCorrectableError, pibitCorrupted, *pchecksumExpected, *pchecksumActual ); + // Try correcting bit flips in the page for non-zero pages. (Pages whose checksum isn't 0). + // Note that a valid old format checksum can be 0 for some combination of non-zero bits on the page. + // A valid new checksum can't be zero because it comprises of two complimentary checksums, + // both of which can only be 0 if all of the bits on the page are 0 or 1. + // See checksum_amd64.cxx for a detailed description of why that is true. + + // Old checksum format doesn't support error correction. + // But it could be that the checksum was new format and the bit that indicated formats got flipped. + // Compute the checksum as new format and try fixing it. + // If it is fixable, then we know that fNewChecksumFormat bit on the page got flipped. + PAGECHECKSUM checksumNewFormat = fNewChecksumFormat ? + *pchecksumActual : + ComputePageChecksum_( pv, cb, pagetype, pgno, fTrue, fFalse ); + + TryFixPage( pv, cb, pagetype, fCorrectError, pfCorrectableError, pibitCorrupted, *pchecksumExpected, checksumNewFormat ); Assert( ( *pfCorrectableError && *pibitCorrupted != -1 ) || ( !*pfCorrectableError && *pibitCorrupted == -1 ) ); // no point in re-computing the checksum if we haven't done any changes diff --git a/dev/ese/src/ese/checksum_test.cxx b/dev/ese/src/ese/checksum_test.cxx index c5d17530..41dd3218 100644 --- a/dev/ese/src/ese/checksum_test.cxx +++ b/dev/ese/src/ese/checksum_test.cxx @@ -46,9 +46,9 @@ PAGECHECKSUM ComputePageChecksum( const UINT cb, const PAGETYPE pagetype, const ULONG pgno, - // set fNew to compute new ECC for a page (R/W wrt the large page!!) - // reset fNew to computer ECC for verification purpose (R/O wrt the page) - const BOOL fNew = fFalse ); + // set fWriteChecksum to compute new ECC for a page (R/W wrt the large page!!) + // reset fWriteChecksum to computer ECC for verification purpose (R/O wrt the page) + const BOOL fWriteChecksum = fFalse ); // @@ -436,6 +436,7 @@ static void TestSetAndChecksum( unsigned char * const pb ) TestFixOnePage( pb, 8192, databasePage, 129 ); TestFixOnePage( pb, 8192, databasePage, 3097 ); TestFixOnePage( pb, 8192, databasePage, ( 8192 * 8 ) - 1 ); + TestFixOnePage( pb, 8192, databasePage, IbitNewChecksumFormatFlag( databasePage ) ); // we can't deal with a corruption in the first checksum or the format // flag. to avoid that, don't corrupt the v1 header at all (the v1 header @@ -464,6 +465,7 @@ static void TestSetAndChecksum( unsigned char * const pb ) TestFixOnePage( pb, 4096, databasePage, 29000 ); TestFixOnePage( pb, 4096, databasePage, 30009 ); TestFixOnePage( pb, 4096, databasePage, ( 4096 * 8 ) - 1 ); + TestFixOnePage( pb, 4096, databasePage, IbitNewChecksumFormatFlag( databasePage ) ); // we can't deal with a corruption in the checksum or the format flag. to // avoid that, don't corrupt the header at all (the header is 40 bytes) @@ -473,11 +475,8 @@ static void TestSetAndChecksum( unsigned char * const pb ) // single-bit corruptions ECC can't fix TestFailToFixOnePage( pb, 8192, databasePage, 0 ); - TestFailToFixOnePage( pb, 8192, databasePage, IbitNewChecksumFormatFlag( databasePage ) ); - TestFailToFixOnePage( pb, 4096, databasePage, 1 ); - TestFailToFixOnePage( pb, 4096, databasePage, IbitNewChecksumFormatFlag( databasePage ) ); - + // single-bit corruptions on pages without ECC TestFailToFixOnePage( pb, 8192, databaseHeader, 100 ); @@ -886,12 +885,10 @@ VOID ExtensiveKnownPageUnitTest( __out_bcount( cbSizeMax ) unsigned char * const { cbit++; - // we currently cannot fix bit flips on the checksum format flag // we currently cannot fix bit flips on the first block's checksum const INT fBitBelongsToFirstChecksum = ( ibit / 8 ) < sizeof( XECHECKSUM ); - const INT fBitIsChecksumFormatFlag = ibit == (INT)IbitNewChecksumFormatFlag( databasePage ); - const INT fBitFixableSingleBitError = !( fBitBelongsToFirstChecksum || fBitIsChecksumFormatFlag ); + const INT fBitFixableSingleBitError = !fBitBelongsToFirstChecksum; // flip one bit @@ -971,15 +968,12 @@ VOID ExtensiveKnownPageUnitTest( __out_bcount( cbSizeMax ) unsigned char * const { cbit++; - // we currently cannot fix bit flips on the checksum format flag // we currently cannot fix bit flips on the first block's checksum const INT fBit1BelongsToFirstChecksum = ( ibit1 / 8 ) < sizeof( XECHECKSUM ); - const INT fBit1IsChecksumFormatFlag = ibit1 == (INT)IbitNewChecksumFormatFlag( databasePage ); - const INT fBit1FixableSingleBitError = !( fBit1BelongsToFirstChecksum || fBit1IsChecksumFormatFlag ); + const INT fBit1FixableSingleBitError = !fBit1BelongsToFirstChecksum; const INT fBit2BelongsToFirstChecksum = ( ibit2 / 8 ) < sizeof( XECHECKSUM ); - const INT fBit2IsChecksumFormatFlag = ibit2 == (INT)IbitNewChecksumFormatFlag( databasePage ); - const INT fBit2FixableSingleBitError = !( fBit2BelongsToFirstChecksum || fBit2IsChecksumFormatFlag ); + const INT fBit2FixableSingleBitError = !fBit2BelongsToFirstChecksum; // flip bits From 4428d96d53fed4f077b500783b29ec3e47ea56dc Mon Sep 17 00:00:00 2001 From: TAW Date: Wed, 7 Sep 2022 23:22:52 +0000 Subject: [PATCH 013/102] Change jethdr.W to wrap base types Modify jethdr.W to not use base types like "long" or "wchar_t", but rather used wrapped base types like JET_UINT32 and JET_WCHAR and JET_PSTR/JET_PWSTR where appropriate. This abstraction will be used when compiling Linux and those two types are different there. Also, fixed a bunch of indention/formatting. For example, normalized placement of the "*" character when defining a pointer param, some use of single-line comments, etc. [Substrate:34324ee3abf5f10ba83f11fce5ec21758cd62263] --- dev/ese/published/inc/jethdr.w | 3746 ++++++++++++++++---------------- 1 file changed, 1883 insertions(+), 1863 deletions(-) diff --git a/dev/ese/published/inc/jethdr.w b/dev/ese/published/inc/jethdr.w index 66297b67..c707e8df 100644 --- a/dev/ese/published/inc/jethdr.w +++ b/dev/ese/published/inc/jethdr.w @@ -66,34 +66,54 @@ extern "C" { #define JET_API __stdcall #define JET_NODSAPI __stdcall -// end_PubEsent -// -// UNDONE: should we just remove this redefinition and -// include basetsd.h (then typedef JET_API_PTR to ULONG_PTR)?? -// -// begin_PubEsent +#ifndef _JET_BASE_TYPES_DEFINED +#define _JET_BASE_TYPES_DEFINED +// Note the use of "long" rather than "int" for JET_INT32/JET_UINT32. +// The JET_API has historically used the base type "long" for 32bit integral +// types. While "int" and "long" are both 32bit integral types and coerce +// back and forth and so may be used interchangably, "int *" and "long *" do +// not. If the base type of the 32bit integral types were to change to +// "int", existing client code could break and require casts for the pointer types. +typedef char JET_INT8; +typedef unsigned char JET_UINT8; +typedef short JET_INT16; +typedef unsigned short JET_UINT16; +typedef long JET_INT32; +typedef unsigned long JET_UINT32; +typedef long long JET_INT64; +typedef unsigned long long JET_UINT64; +typedef unsigned char JET_BYTE; +typedef void JET_VOID; +typedef void * JET_PVOID; +typedef const void * JET_PCVOID; +typedef char JET_CHAR; +#if !defined(_NATIVE_WCHAR_T_DEFINED) +typedef unsigned short JET_WCHAR; +#else +typedef wchar_t JET_WCHAR; +#endif +#endif // _JET_BASE_TYPES_DEFINED + #if defined(_WIN64) - typedef unsigned __int64 JET_API_PTR; -#elif !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) - typedef __w64 unsigned long JET_API_PTR; + typedef JET_UINT64 JET_API_PTR; #else - typedef unsigned long JET_API_PTR; + typedef JET_UINT32 JET_API_PTR; #endif -typedef _Return_type_success_( return >= 0 ) long JET_ERR; +typedef _Return_type_success_( return >= 0 ) JET_INT32 JET_ERR; // end_PubEsent #if ( JET_VERSION >= 0x0A01 ) -typedef unsigned long JET_ENGINEFORMATVERSION; /* efv - engine format version specification */ +typedef JET_UINT32 JET_ENGINEFORMATVERSION; // efv - engine format version specification #endif // JET_VERSION >= 0x0A01 // begin_PubEsent -typedef JET_API_PTR JET_HANDLE; /* backup file handle */ -typedef JET_API_PTR JET_INSTANCE; /* Instance Identifier */ -typedef JET_API_PTR JET_SESID; /* Session Identifier */ -typedef JET_API_PTR JET_TABLEID; /* Table Identifier */ +typedef JET_API_PTR JET_HANDLE; // backup file handle +typedef JET_API_PTR JET_INSTANCE; // Instance Identifier +typedef JET_API_PTR JET_SESID; // Session Identifier +typedef JET_API_PTR JET_TABLEID; // Table Identifier #if ( JET_VERSION >= 0x0501 ) -typedef JET_API_PTR JET_LS; /* Local Storage */ +typedef JET_API_PTR JET_LS; // Local Storage #endif // JET_VERSION >= 0x0501 // end_PubEsent #if ( JET_VERSION >= 0x0601 ) @@ -101,37 +121,37 @@ typedef JET_API_PTR JET_HISTO; #endif // JET_VERSION >= 0x0601 // begin_PubEsent -typedef unsigned long JET_COLUMNID; /* Column Identifier */ +typedef JET_UINT32 JET_COLUMNID; // Column Identifier typedef struct tagJET_INDEXID { - unsigned long cbStruct; - unsigned char rgbIndexId[sizeof(JET_API_PTR)+sizeof(unsigned long)+sizeof(unsigned long)]; + JET_UINT32 cbStruct; + JET_BYTE rgbIndexId[sizeof(JET_API_PTR)+sizeof(JET_UINT32)+sizeof(JET_UINT32)]; } JET_INDEXID; -typedef unsigned long JET_DBID; /* Database Identifier */ -typedef unsigned long JET_OBJTYP; /* Object Type */ -typedef unsigned long JET_COLTYP; /* Column Type */ -typedef unsigned long JET_GRBIT; /* Group of Bits */ +typedef JET_UINT32 JET_DBID; // Database Identifier +typedef JET_UINT32 JET_OBJTYP; // Object Type +typedef JET_UINT32 JET_COLTYP; // Column Type +typedef JET_UINT32 JET_GRBIT; // Group of Bits -typedef unsigned long JET_SNP; /* Status Notification Process */ -typedef unsigned long JET_SNT; /* Status Notification Type */ +typedef JET_UINT32 JET_SNP; // Status Notification Process +typedef JET_UINT32 JET_SNT; // Status Notification Type // end_PubEsent -typedef unsigned long JET_SNC; /* Status Notification Code */ +typedef JET_UINT32 JET_SNC; // Status Notification Code // begin_PubEsent -typedef double JET_DATESERIAL; /* JET_coltypDateTime format */ +typedef double JET_DATESERIAL; // JET_coltypDateTime format // end_PubEsent -typedef unsigned long JET_DLLID; /* ID of DLL for hook functions */ +typedef JET_UINT32 JET_DLLID; // ID of DLL for hook functions // begin_PubEsent #if ( JET_VERSION >= 0x0501 ) -typedef unsigned long JET_CBTYP; /* Callback Types */ +typedef JET_UINT32 JET_CBTYP; // Callback Types #endif // JET_VERSION >= 0x0501 -typedef JET_ERR (JET_API *JET_PFNSTATUS)( - _In_ JET_SESID sesid, - _In_ JET_SNP snp, - _In_ JET_SNT snt, - _In_opt_ void * pv ); +typedef JET_ERR (JET_API * JET_PFNSTATUS)( + _In_ JET_SESID sesid, + _In_ JET_SNP snp, + _In_ JET_SNT snt, + _In_opt_ JET_PVOID pv ); // end_PubEsent @@ -141,37 +161,32 @@ typedef JET_ERR (JET_API *JET_PFNSTATUS)( // it has a user-provided context and eliminates the unused sesid // parameter. typedef JET_ERR (JET_API * JET_PFNINITCALLBACK)( - _In_ JET_SNP snp, - _In_ JET_SNT snt, - _In_opt_ void * pv, // depends on the snp, snt - _In_opt_ void * pvContext ); // provided in JetInit4 + _In_ JET_SNP snp, + _In_ JET_SNT snt, + _In_opt_ JET_PVOID pv, // depends on the snp, snt + _In_opt_ JET_PVOID pvContext ); // provided in JetInit4 #endif // JET_VERSION >= 0x0A01 // begin_PubEsent -#if !defined(_NATIVE_WCHAR_T_DEFINED) -typedef unsigned short WCHAR; -#else -typedef wchar_t WCHAR; -#endif +typedef _Null_terminated_ JET_CHAR * JET_PSTR; // ASCII string (char *) null terminated +typedef _Null_terminated_ const JET_CHAR * JET_PCSTR; // const ASCII string (char *) null terminated +typedef _Null_terminated_ JET_WCHAR * JET_PWSTR; // Unicode string (wchar_t *) null terminated +typedef _Null_terminated_ const JET_WCHAR * JET_PCWSTR; // const Unicode string (wchar_t *) null terminated -typedef _Null_terminated_ char * JET_PSTR; /* ASCII string (char *) null terminated */ -typedef _Null_terminated_ const char * JET_PCSTR; /* const ASCII string (char *) null terminated */ -typedef _Null_terminated_ WCHAR * JET_PWSTR; /* Unicode string (char *) null terminated */ -typedef _Null_terminated_ const WCHAR * JET_PCWSTR; /* const Unicode string (char *) null terminated */ typedef struct { - char *szDatabaseName; - char *szNewDatabaseName; -} JET_RSTMAP_A; /* restore map */ + JET_PSTR szDatabaseName; + JET_PSTR szNewDatabaseName; +} JET_RSTMAP_A; // restore map typedef struct { - WCHAR *szDatabaseName; - WCHAR *szNewDatabaseName; -} JET_RSTMAP_W; /* restore map */ + JET_PWSTR szDatabaseName; + JET_PWSTR szNewDatabaseName; +} JET_RSTMAP_W; // restore map #ifdef JET_UNICODE #define JET_RSTMAP JET_RSTMAP_W @@ -185,32 +200,32 @@ typedef struct typedef struct tagJET_SETDBPARAM { - unsigned long dbparamid; // One of the JET_dbparams. + JET_UINT32 dbparamid; // One of the JET_dbparams. - _Field_size_bytes_( cbParam ) void * pvParam; // Address of the value of the parameter. Note that even for integral types, a valid - // memory location must be passed, as opposed to the numerical value cast to a void*. + _Field_size_bytes_( cbParam ) JET_PVOID pvParam; // Address of the value of the parameter. Note that even for integral types, a valid + // memory location must be passed, as opposed to the numerical value cast to a PVOID. - unsigned long cbParam; // The size of the data, in bytes, pointed to by pvParam. + JET_UINT32 cbParam; // The size of the data, in bytes, pointed to by pvParam. } JET_SETDBPARAM; typedef struct { - unsigned long cbStruct; // size of this structure (for future expansion) - char *szDatabaseName; // (optional) original database path - char *szNewDatabaseName; // new database path - _Field_size_opt_( csetdbparam ) JET_SETDBPARAM *rgsetdbparam; // (optional) array of database parameters - unsigned long csetdbparam; // number of elements in rgsetdbparam - JET_GRBIT grbit; // recovery options + JET_UINT32 cbStruct; // size of this structure (for future expansion) + JET_PSTR szDatabaseName; // (optional) original database path + JET_PSTR szNewDatabaseName; // new database path + _Field_size_opt_( csetdbparam ) JET_SETDBPARAM * rgsetdbparam; // (optional) array of database parameters + JET_UINT32 csetdbparam; // number of elements in rgsetdbparam + JET_GRBIT grbit; // recovery options } JET_RSTMAP2_A; typedef struct { - unsigned long cbStruct; // size of this structure (for future expansion) - WCHAR *szDatabaseName; // (optional) original database path - WCHAR *szNewDatabaseName; // new database path - _Field_size_opt_( csetdbparam ) JET_SETDBPARAM *rgsetdbparam; // (optional) array of database parameters - unsigned long csetdbparam; // number of elements in rgsetdbparam - JET_GRBIT grbit; // recovery options + JET_UINT32 cbStruct; // size of this structure (for future expansion) + JET_PWSTR szDatabaseName; // (optional) original database path + JET_PWSTR szNewDatabaseName; // new database path + _Field_size_opt_( csetdbparam ) JET_SETDBPARAM * rgsetdbparam; // (optional) array of database parameters + JET_UINT32 csetdbparam; // number of elements in rgsetdbparam + JET_GRBIT grbit; // recovery options } JET_RSTMAP2_W; #ifdef JET_UNICODE @@ -227,26 +242,26 @@ typedef struct typedef struct tagCONVERT_A { - char *szOldDll; + JET_PSTR szOldDll; union { - unsigned long fFlags; + JET_UINT32 fFlags; struct { - unsigned long fSchemaChangesOnly:1; + JET_UINT32 fSchemaChangesOnly:1; }; }; } JET_CONVERT_A; typedef struct tagCONVERT_W { - WCHAR *szOldDll; + JET_PWSTR szOldDll; union { - unsigned long fFlags; + JET_UINT32 fFlags; struct { - unsigned long fSchemaChangesOnly:1; + JET_UINT32 fSchemaChangesOnly:1; }; }; } JET_CONVERT_W; @@ -310,7 +325,7 @@ typedef enum typedef struct tagDBUTIL_A { - unsigned long cbStruct; + JET_UINT32 cbStruct; JET_SESID sesid; JET_DBID dbid; @@ -329,51 +344,51 @@ typedef struct tagDBUTIL_A // legacy elements struct { - char *szDatabase; - char *szSLV_ObsoleteAndUnused; // No longer used. Left in to preserve the subsequent values; - char *szBackup; - const char *szTable; - const char *szIndex; - char *szIntegPrefix; + JET_PSTR szDatabase; + JET_PSTR szSLV_ObsoleteAndUnused; // No longer used. Left in to preserve the subsequent values; + JET_PSTR szBackup; + JET_PCSTR szTable; + JET_PCSTR szIndex; + JET_PSTR szIntegPrefix; - long pgno; - long iline; + JET_INT32 pgno; + JET_INT32 iline; - long lGeneration; - long isec; - long ib; + JET_INT32 lGeneration; + JET_INT32 isec; + JET_INT32 ib; - long cRetry; + JET_INT32 cRetry; - void * pfnCallback; - void * pvCallback; + JET_PVOID pfnCallback; + JET_PVOID pvCallback; }; // ChecksumLogFromMemory struct { - char *szLog; // Name of the Log file - char *szBase; // Base name used e.g. "edb" or "E01" - void *pvBuffer; // Pointer to buffer containing the log - long cbBuffer; // Length of buffer + JET_PSTR szLog; // Name of the Log file + JET_PSTR szBase; // Base name used e.g. "edb" or "E01" + JET_PVOID pvBuffer; // Pointer to buffer containing the log + JET_INT32 cbBuffer; // Length of buffer } checksumlogfrommemory; // opDBUTILDumpSpaceCategory struct { - char *szDatabase; // Database from which to dump the space category of pages. - unsigned long pgnoFirst; // First page to dump the category for. The first page in the database is 1. - unsigned long pgnoLast; // Last page to dump the category for. The last page in the database can be passed in as (unsigned long)-1. - void *pfnSpaceCatCallback; // Callback to receive each page's category (JET_SPCATCALLBACK). - void *pvContext; // General purpose context which is passed back to the client callback (pfnSpaceCatCallback). + JET_PSTR szDatabase; // Database from which to dump the space category of pages. + JET_UINT32 pgnoFirst; // First page to dump the category for. The first page in the database is 1. + JET_UINT32 pgnoLast; // Last page to dump the category for. The last page in the database can be passed in as (JET_UINT32)-1. + JET_PVOID pfnSpaceCatCallback; // Callback to receive each page's category (JET_SPCATCALLBACK). + JET_PVOID pvContext; // General purpose context which is passed back to the client callback (pfnSpaceCatCallback). } spcatOptions; // opDBUTILDumpRBS struct { - char *szDatabase; // Database from which to dump the space category of pages. - unsigned long pgnoFirst; // First page to dump the category for. The first page in the database is 1. - unsigned long pgnoLast; // Last page to dump the category for. The last page in the database can be passed in as (unsigned long)-1. + JET_PSTR szDatabase; // Database from which to dump the space category of pages. + JET_UINT32 pgnoFirst; // First page to dump the category for. The first page in the database is 1. + JET_UINT32 pgnoLast; // Last page to dump the category for. The last page in the database can be passed in as (JET_UINT32)-1. } rbsOptions; }; @@ -382,7 +397,7 @@ typedef struct tagDBUTIL_A typedef struct tagDBUTIL_W { - unsigned long cbStruct; + JET_UINT32 cbStruct; JET_SESID sesid; JET_DBID dbid; @@ -401,51 +416,51 @@ typedef struct tagDBUTIL_W // legacy elements struct { - WCHAR *szDatabase; - WCHAR *szSLV_ObsoleteAndUnused; // No longer used. Left in to preserve the subsequent values; - WCHAR *szBackup; - const WCHAR *szTable; - const WCHAR *szIndex; - WCHAR *szIntegPrefix; + JET_PWSTR szDatabase; + JET_PWSTR szSLV_ObsoleteAndUnused; // No longer used. Left in to preserve the subsequent values; + JET_PWSTR szBackup; + JET_PCWSTR szTable; + JET_PCWSTR szIndex; + JET_PWSTR szIntegPrefix; - long pgno; - long iline; + JET_INT32 pgno; + JET_INT32 iline; - long lGeneration; - long isec; - long ib; + JET_INT32 lGeneration; + JET_INT32 isec; + JET_INT32 ib; - long cRetry; + JET_INT32 cRetry; - void *pfnCallback; - void *pvCallback; + JET_PVOID pfnCallback; + JET_PVOID pvCallback; }; // ChecksumLogFromMemory struct { - WCHAR *szLog; // Name of the Log file - WCHAR *szBase; // Base name used e.g. "edb" or "E01" - void *pvBuffer; // Pointer to buffer containing the log - long cbBuffer; // Length of buffer + JET_PWSTR szLog; // Name of the Log file + JET_PWSTR szBase; // Base name used e.g. "edb" or "E01" + JET_PVOID pvBuffer; // Pointer to buffer containing the log + JET_INT32 cbBuffer; // Length of buffer } checksumlogfrommemory; // opDBUTILDumpSpaceCategory struct { - WCHAR *szDatabase; // Database from which to dump the space category of pages. - unsigned long pgnoFirst; // First page to dump the category for. The first page in the database is 1. - unsigned long pgnoLast; // Last page to dump the category for. The last page in the database can be passed in as (unsigned long)-1. - void *pfnSpaceCatCallback; // Callback to receive each page's category (JET_SPCATCALLBACK). - void *pvContext; // General purpose context. + JET_PWSTR szDatabase; // Database from which to dump the space category of pages. + JET_UINT32 pgnoFirst; // First page to dump the category for. The first page in the database is 1. + JET_UINT32 pgnoLast; // Last page to dump the category for. The last page in the database can be passed in as (JET_UINT32)-1. + JET_PVOID pfnSpaceCatCallback; // Callback to receive each page's category (JET_SPCATCALLBACK). + JET_PVOID pvContext; // General purpose context. } spcatOptions; // opDBUTILDumpRBS struct { - WCHAR *szDatabase; // Database from which to dump the space category of pages. - unsigned long pgnoFirst; // First page to dump the category for. The first page in the database is 1. - unsigned long pgnoLast; // Last page to dump the category for. The last page in the database can be passed in as (unsigned long)-1. + JET_PWSTR szDatabase; // Database from which to dump the space category of pages. + JET_UINT32 pgnoFirst; // First page to dump the category for. The first page in the database is 1. + JET_UINT32 pgnoLast; // Last page to dump the category for. The last page in the database can be passed in as (JET_UINT32)-1. } rbsOptions; }; @@ -490,13 +505,17 @@ typedef enum } SpaceCategoryFlags; // Callback used by opDBUTILDumpSpaceCategory to return page space categories. -typedef void (JET_API *JET_SPCATCALLBACK)( _In_ const unsigned long pgno, _In_ const unsigned long objid, _In_ const SpaceCategoryFlags spcatf, _In_opt_ void* const pvContext ); +typedef JET_VOID (JET_API * JET_SPCATCALLBACK)( + _In_ const JET_UINT32 pgno, + _In_ const JET_UINT32 objid, + _In_ const SpaceCategoryFlags spcatf, + _In_opt_ const JET_PVOID pvContext ); #endif // JET_VERSION >= 0x0A01 // DBUTIL_OP op = opDBUTILDumpSpace // #define JET_bitDBUtilSpaceInfoBasicCatalog 0x00000001 -#define JET_bitDBUtilSpaceInfoSpaceTrees 0x00000002 +#define JET_bitDBUtilSpaceInfoSpaceTrees 0x00000002 #define JET_bitDBUtilSpaceInfoParentOfLeaf 0x00000004 #define JET_bitDBUtilSpaceInfoFullWalk 0x00000008 // This command also utilizes this option: @@ -763,15 +782,15 @@ typedef void (JET_API *JET_SPCATCALLBACK)( _In_ const unsigned long pgno, _In_ c /* Callback-function prototype */ -typedef JET_ERR (JET_API *JET_CALLBACK)( - _In_ JET_SESID sesid, - _In_ JET_DBID dbid, - _In_ JET_TABLEID tableid, - _In_ JET_CBTYP cbtyp, - _Inout_opt_ void * pvArg1, - _Inout_opt_ void * pvArg2, - _In_opt_ void * pvContext, - _In_ JET_API_PTR ulUnused ); +typedef JET_ERR (JET_API * JET_CALLBACK)( + _In_ JET_SESID sesid, + _In_ JET_DBID dbid, + _In_ JET_TABLEID tableid, + _In_ JET_CBTYP cbtyp, + _Inout_opt_ JET_PVOID pvArg1, + _Inout_opt_ JET_PVOID pvArg2, + _In_opt_ JET_PVOID pvContext, + _In_ JET_API_PTR ulUnused ); #endif // JET_VERSION >= 0x0501 // end_PubEsent @@ -795,18 +814,18 @@ typedef JET_ERR (JET_API *JET_CALLBACK)( // // currently UNSUPPORTED // -typedef JET_ERR (JET_API *JET_ABORTRETRYFAILCALLBACK_A)( - _In_ char * szFile, - _In_ unsigned long Offset, - _In_ unsigned long OffsetHigh, - _In_ unsigned long Length, +typedef JET_ERR (JET_API * JET_ABORTRETRYFAILCALLBACK_A)( + _In_ JET_PSTR szFile, + _In_ JET_UINT32 Offset, + _In_ JET_UINT32 OffsetHigh, + _In_ JET_UINT32 Length, _In_ JET_ERR err ); -typedef JET_ERR (JET_API *JET_ABORTRETRYFAILCALLBACK_W)( - _In_ WCHAR * szFile, - _In_ unsigned long Offset, - _In_ unsigned long OffsetHigh, - _In_ unsigned long Length, +typedef JET_ERR (JET_API * JET_ABORTRETRYFAILCALLBACK_W)( + _In_ JET_PWSTR szFile, + _In_ JET_UINT32 Offset, + _In_ JET_UINT32 OffsetHigh, + _In_ JET_UINT32 Length, _In_ JET_ERR err ); #ifdef JET_UNICODE @@ -899,12 +918,12 @@ typedef enum // // tracing callbacks // -typedef void (JET_API *JET_PFNTRACEEMIT)( +typedef JET_VOID (JET_API * JET_PFNTRACEEMIT)( _In_ const JET_TRACETAG tag, _In_ JET_PCSTR szPrefix, _In_ JET_PCSTR szTrace, _In_ const JET_API_PTR ul ); -typedef void (JET_API *JET_PFNTRACEREGISTER)( +typedef JET_VOID (JET_API * JET_PFNTRACEREGISTER)( _In_ const JET_TRACETAG tag, _In_ JET_PCSTR szDesc, _Out_ JET_API_PTR * pul ); @@ -938,10 +957,10 @@ typedef enum #if ( JET_VERSION >= 0x0600 ) typedef struct JET_SESSIONINFO { - unsigned long ulTrxBegin0; - unsigned long ulTrxLevel; - unsigned long ulProcid; - unsigned long ulFlags; + JET_UINT32 ulTrxBegin0; + JET_UINT32 ulTrxLevel; + JET_UINT32 ulProcid; + JET_UINT32 ulFlags; JET_API_PTR ulTrxContext; } JET_SESSIONINFO; #endif // JET_VERSION >= 0x0600 @@ -949,46 +968,46 @@ typedef struct JET_SESSIONINFO /* Status Notification Structures */ -typedef struct /* Status Notification Progress */ +typedef struct // Status Notification Progress { - unsigned long cbStruct; /* Size of this structure */ - unsigned long cunitDone; /* Number of units of work completed */ - unsigned long cunitTotal; /* Total number of units of work */ + JET_UINT32 cbStruct; // Size of this structure + JET_UINT32 cunitDone; // Number of units of work completed + JET_UINT32 cunitTotal; // Total number of units of work } JET_SNPROG; typedef struct { - unsigned long cbStruct; + JET_UINT32 cbStruct; - unsigned long cbFilesizeLow; // file's current size (low DWORD) - unsigned long cbFilesizeHigh; // file's current size (high DWORD) + JET_UINT32 cbFilesizeLow; // file's current size (low DWORD) + JET_UINT32 cbFilesizeHigh; // file's current size (high DWORD) - unsigned long cbFreeSpaceRequiredLow; // estimate of free disk space required for in-place upgrade (low DWORD) - unsigned long cbFreeSpaceRequiredHigh;// estimate of free disk space required for in-place upgrade (high DWORD) + JET_UINT32 cbFreeSpaceRequiredLow; // estimate of free disk space required for in-place upgrade (low DWORD) + JET_UINT32 cbFreeSpaceRequiredHigh;// estimate of free disk space required for in-place upgrade (high DWORD) - unsigned long csecToUpgrade; // estimate of time required, in seconds, for upgrade + JET_UINT32 csecToUpgrade; // estimate of time required, in seconds, for upgrade union { - unsigned long ulFlags; + JET_UINT32 ulFlags; struct { - unsigned long fUpgradable:1; - unsigned long fAlreadyUpgraded:1; + JET_UINT32 fUpgradable:1; + JET_UINT32 fAlreadyUpgraded:1; }; }; } JET_DBINFOUPGRADE; typedef struct { - unsigned long cbStruct; + JET_UINT32 cbStruct; JET_OBJTYP objtyp; JET_DATESERIAL dtCreate; // Deprecated. JET_DATESERIAL dtUpdate; // Deprecated. JET_GRBIT grbit; - unsigned long flags; - unsigned long cRecord; - unsigned long cPage; + JET_UINT32 flags; + JET_UINT32 cRecord; + JET_UINT32 cPage; } JET_OBJECTINFO; /* The following flags appear in the grbit field above */ @@ -1014,9 +1033,9 @@ typedef struct typedef struct { - unsigned long cbStruct; + JET_UINT32 cbStruct; JET_TABLEID tableid; - unsigned long cRecord; + JET_UINT32 cRecord; JET_COLUMNID columnidcontainername; JET_COLUMNID columnidobjectname; JET_COLUMNID columnidobjtyp; @@ -1024,17 +1043,17 @@ typedef struct JET_COLUMNID columniddtUpdate; // XXX -- to be deleted JET_COLUMNID columnidgrbit; JET_COLUMNID columnidflags; - JET_COLUMNID columnidcRecord; /* Level 2 info */ - JET_COLUMNID columnidcPage; /* Level 2 info */ + JET_COLUMNID columnidcRecord; // Level 2 info + JET_COLUMNID columnidcPage; // Level 2 info } JET_OBJECTLIST; #define cObjectInfoCols 9 typedef struct { - unsigned long cbStruct; + JET_UINT32 cbStruct; JET_TABLEID tableid; - unsigned long cRecord; + JET_UINT32 cRecord; JET_COLUMNID columnidPresentationOrder; JET_COLUMNID columnidcolumnname; JET_COLUMNID columnidcolumnid; @@ -1055,47 +1074,47 @@ typedef struct typedef struct { - unsigned long cbStruct; + JET_UINT32 cbStruct; JET_COLUMNID columnid; JET_COLTYP coltyp; - unsigned short wCountry; // sepcifies the country/region for the column definition - unsigned short langid; - unsigned short cp; - unsigned short wCollate; /* Must be 0 */ - unsigned long cbMax; + JET_UINT16 wCountry; // sepcifies the country/region for the column definition + JET_UINT16 langid; + JET_UINT16 cp; + JET_UINT16 wCollate; // Must be 0 + JET_UINT32 cbMax; JET_GRBIT grbit; } JET_COLUMNDEF; typedef struct { - unsigned long cbStruct; + JET_UINT32 cbStruct; JET_COLUMNID columnid; JET_COLTYP coltyp; - unsigned short wCountry; // specifies the columnid for the country/region field - unsigned short langid; - unsigned short cp; - unsigned short wFiller; /* Must be 0 */ - unsigned long cbMax; + JET_UINT16 wCountry; // specifies the columnid for the country/region field + JET_UINT16 langid; + JET_UINT16 cp; + JET_UINT16 wFiller; // Must be 0 + JET_UINT32 cbMax; JET_GRBIT grbit; - char szBaseTableName[256]; - char szBaseColumnName[256]; + JET_CHAR szBaseTableName[256]; + JET_CHAR szBaseColumnName[256]; } JET_COLUMNBASE_A; typedef struct { - unsigned long cbStruct; + JET_UINT32 cbStruct; JET_COLUMNID columnid; JET_COLTYP coltyp; - unsigned short wCountry; // specifies the columnid for the country/region field - unsigned short langid; - unsigned short cp; - unsigned short wFiller; /* Must be 0 */ - unsigned long cbMax; + JET_UINT16 wCountry; // specifies the columnid for the country/region field + JET_UINT16 langid; + JET_UINT16 cp; + JET_UINT16 wFiller; // Must be 0 + JET_UINT32 cbMax; JET_GRBIT grbit; - WCHAR szBaseTableName[256]; - WCHAR szBaseColumnName[256]; + JET_WCHAR szBaseTableName[256]; + JET_WCHAR szBaseColumnName[256]; } JET_COLUMNBASE_W; @@ -1108,9 +1127,9 @@ typedef struct typedef struct { - unsigned long cbStruct; + JET_UINT32 cbStruct; JET_TABLEID tableid; - unsigned long cRecord; + JET_UINT32 cRecord; JET_COLUMNID columnidindexname; JET_COLUMNID columnidgrbitIndex; JET_COLUMNID columnidcKey; @@ -1134,28 +1153,28 @@ typedef struct typedef struct tag_JET_COLUMNCREATE_A { - unsigned long cbStruct; // size of this structure (for future expansion) - char *szColumnName; // column name + JET_UINT32 cbStruct; // size of this structure (for future expansion) + JET_PSTR szColumnName; // column name JET_COLTYP coltyp; // column type - unsigned long cbMax; // the maximum length of this column (only relevant for binary and text columns) + JET_UINT32 cbMax; // the maximum length of this column (only relevant for binary and text columns) JET_GRBIT grbit; // column options - void *pvDefault; // default value (NULL if none) - unsigned long cbDefault; // length of default value - unsigned long cp; // code page (for text columns only) + JET_PVOID pvDefault; // default value (NULL if none) + JET_UINT32 cbDefault; // length of default value + JET_UINT32 cp; // code page (for text columns only) JET_COLUMNID columnid; // returned column id JET_ERR err; // returned error code } JET_COLUMNCREATE_A; typedef struct tag_JET_COLUMNCREATE_W { - unsigned long cbStruct; // size of this structure (for future expansion) - WCHAR *szColumnName; // column name + JET_UINT32 cbStruct; // size of this structure (for future expansion) + JET_PWSTR szColumnName; // column name JET_COLTYP coltyp; // column type - unsigned long cbMax; // the maximum length of this column (only relevant for binary and text columns) + JET_UINT32 cbMax; // the maximum length of this column (only relevant for binary and text columns) JET_GRBIT grbit; // column options - void *pvDefault; // default value (NULL if none) - unsigned long cbDefault; // length of default value - unsigned long cp; // code page (for text columns only) + JET_PVOID pvDefault; // default value (NULL if none) + JET_UINT32 cbDefault; // length of default value + JET_UINT32 cp; // code page (for text columns only) JET_COLUMNID columnid; // returned column id JET_ERR err; // returned error code } JET_COLUMNCREATE_W; @@ -1172,18 +1191,18 @@ typedef struct tag_JET_COLUMNCREATE_W typedef struct tag_JET_USERDEFINEDDEFAULT_A { - char * szCallback; - unsigned char * pbUserData; - unsigned long cbUserData; - char * szDependantColumns; + JET_PSTR szCallback; + JET_BYTE * pbUserData; + JET_UINT32 cbUserData; + JET_PSTR szDependantColumns; } JET_USERDEFINEDDEFAULT_A; typedef struct tag_JET_USERDEFINEDDEFAULT_W { - WCHAR * szCallback; - unsigned char * pbUserData; - unsigned long cbUserData; - WCHAR * szDependantColumns; + JET_PWSTR szCallback; + JET_BYTE * pbUserData; + JET_UINT32 cbUserData; + JET_PWSTR szDependantColumns; } JET_USERDEFINEDDEFAULT_W; #ifdef JET_UNICODE @@ -1196,15 +1215,15 @@ typedef struct tag_JET_USERDEFINEDDEFAULT_W typedef struct tagJET_CONDITIONALCOLUMN_A { - unsigned long cbStruct; // size of this structure (for future expansion) - char *szColumnName; // column that we are conditionally indexed on + JET_UINT32 cbStruct; // size of this structure (for future expansion) + JET_PSTR szColumnName; // column that we are conditionally indexed on JET_GRBIT grbit; // conditional column options } JET_CONDITIONALCOLUMN_A; typedef struct tagJET_CONDITIONALCOLUMN_W { - unsigned long cbStruct; // size of this structure (for future expansion) - WCHAR *szColumnName; // column that we are conditionally indexed on + JET_UINT32 cbStruct; // size of this structure (for future expansion) + JET_PWSTR szColumnName; // column that we are conditionally indexed on JET_GRBIT grbit; // conditional column options } JET_CONDITIONALCOLUMN_W; @@ -1216,27 +1235,27 @@ typedef struct tagJET_CONDITIONALCOLUMN_W typedef struct tagJET_UNICODEINDEX { - unsigned long lcid; - unsigned long dwMapFlags; + JET_UINT32 lcid; + JET_UINT32 dwMapFlags; } JET_UNICODEINDEX; #if ( JET_VERSION >= 0x0602 ) typedef struct tagJET_UNICODEINDEX2 { - _Field_z_ WCHAR *szLocaleName; - unsigned long dwMapFlags; + _Field_z_ JET_PWSTR szLocaleName; + JET_UINT32 dwMapFlags; } JET_UNICODEINDEX2; #endif //JET_VERSION >= 0x0602 #if ( JET_VERSION >= 0x0502 ) typedef struct tagJET_TUPLELIMITS { - unsigned long chLengthMin; - unsigned long chLengthMax; - unsigned long chToIndexMax; + JET_UINT32 chLengthMin; + JET_UINT32 chLengthMax; + JET_UINT32 chToIndexMax; #if ( JET_VERSION >= 0x0600 ) - unsigned long cchIncrement; - unsigned long ichStart; + JET_UINT32 cchIncrement; + JET_UINT32 ichStart; #endif // JET_VERSION >= 0x0600 } JET_TUPLELIMITS; #endif // JET_VERSION >= 0x0502 @@ -1246,9 +1265,9 @@ typedef struct tagJET_TUPLELIMITS // table, index, or the internal long values tree. typedef struct tagJET_SPACEHINTS { - unsigned long cbStruct; // size of this structure - unsigned long ulInitialDensity; // density at (append) layout. - unsigned long cbInitial; // initial size (in bytes). + JET_UINT32 cbStruct; // size of this structure + JET_UINT32 ulInitialDensity; // density at (append) layout. + JET_UINT32 cbInitial; // initial size (in bytes). JET_GRBIT grbit; // Combination of one or more flags from // JET_bitSpaceHints* flags @@ -1256,11 +1275,11 @@ typedef struct tagJET_SPACEHINTS // JET_bitRetrieveHints* flags // JET_bitUpdateHints* flags // JET_bitDeleteHints* flags - unsigned long ulMaintDensity; // density to maintain at. - unsigned long ulGrowth; // percent growth from: + JET_UINT32 ulMaintDensity; // density to maintain at. + JET_UINT32 ulGrowth; // percent growth from: // last growth or initial size (possibly rounded to nearest native JET allocation size). - unsigned long cbMinExtent; // This overrides ulGrowth if too small. - unsigned long cbMaxExtent; // This caps ulGrowth. + JET_UINT32 cbMinExtent; // This overrides ulGrowth if too small. + JET_UINT32 cbMaxExtent; // This caps ulGrowth. } JET_SPACEHINTS; #endif // JET_VERSION >= 0x0601 @@ -1270,58 +1289,58 @@ typedef struct tagJET_SPACEHINTS // was used (backward compatibility). typedef struct tagJET_INDEXCREATEOLD_A { - unsigned long cbStruct; // size of this structure (for future expansion) - char *szIndexName; // index name - char *szKey; // index key definition - unsigned long cbKey; // size of key definition in szKey - JET_GRBIT grbit; // index options - unsigned long ulDensity; // index density + JET_UINT32 cbStruct; // size of this structure (for future expansion) + JET_PSTR szIndexName; // index name + JET_PSTR szKey; // index key definition + JET_UINT32 cbKey; // size of key definition in szKey + JET_GRBIT grbit; // index options + JET_UINT32 ulDensity; // index density union { - unsigned long lcid; // lcid for the index (if JET_bitIndexUnicode NOT specified) - JET_UNICODEINDEX *pidxunicode; // pointer to JET_UNICODEINDEX struct (if JET_bitIndexUnicode specified) + JET_UINT32 lcid; // lcid for the index (if JET_bitIndexUnicode NOT specified) + JET_UNICODEINDEX * pidxunicode; // pointer to JET_UNICODEINDEX struct (if JET_bitIndexUnicode specified) }; union { - unsigned long cbVarSegMac; // maximum length of variable length columns in index key (if JET_bitIndexTupleLimits not specified) + JET_UINT32 cbVarSegMac; // maximum length of variable length columns in index key (if JET_bitIndexTupleLimits not specified) #if ( JET_VERSION >= 0x0502 ) - JET_TUPLELIMITS *ptuplelimits; // pointer to JET_TUPLELIMITS struct (if JET_bitIndexTupleLimits specified) + JET_TUPLELIMITS * ptuplelimits; // pointer to JET_TUPLELIMITS struct (if JET_bitIndexTupleLimits specified) #endif // ! JET_VERSION >= 0x0502 }; - JET_CONDITIONALCOLUMN_A *rgconditionalcolumn; // pointer to conditional column structure - unsigned long cConditionalColumn; // number of conditional columns - JET_ERR err; // returned error code + JET_CONDITIONALCOLUMN_A * rgconditionalcolumn; // pointer to conditional column structure + JET_UINT32 cConditionalColumn; // number of conditional columns + JET_ERR err; // returned error code } JET_INDEXCREATEOLD_A; typedef struct tagJET_INDEXCREATEOLD_W { - unsigned long cbStruct; // size of this structure (for future expansion) - WCHAR *szIndexName; // index name - WCHAR *szKey; // index key definition - unsigned long cbKey; // size of key definition in szKey - JET_GRBIT grbit; // index options - unsigned long ulDensity; // index density + JET_UINT32 cbStruct; // size of this structure (for future expansion) + JET_PWSTR szIndexName; // index name + JET_PWSTR szKey; // index key definition + JET_UINT32 cbKey; // size of key definition in szKey + JET_GRBIT grbit; // index options + JET_UINT32 ulDensity; // index density union { - unsigned long lcid; // lcid for the index (if JET_bitIndexUnicode NOT specified) - JET_UNICODEINDEX *pidxunicode; // pointer to JET_UNICODEINDEX struct (if JET_bitIndexUnicode specified) + JET_UINT32 lcid; // lcid for the index (if JET_bitIndexUnicode NOT specified) + JET_UNICODEINDEX * pidxunicode; // pointer to JET_UNICODEINDEX struct (if JET_bitIndexUnicode specified) }; union { - unsigned long cbVarSegMac; // maximum length of variable length columns in index key (if JET_bitIndexTupleLimits not specified) + JET_UINT32 cbVarSegMac; // maximum length of variable length columns in index key (if JET_bitIndexTupleLimits not specified) #if ( JET_VERSION >= 0x0502 ) - JET_TUPLELIMITS *ptuplelimits; // pointer to JET_TUPLELIMITS struct (if JET_bitIndexTupleLimits specified) + JET_TUPLELIMITS * ptuplelimits; // pointer to JET_TUPLELIMITS struct (if JET_bitIndexTupleLimits specified) #endif // ! JET_VERSION >= 0x0502 }; - JET_CONDITIONALCOLUMN_W *rgconditionalcolumn; // pointer to conditional column structure - unsigned long cConditionalColumn; // number of conditional columns - JET_ERR err; // returned error code + JET_CONDITIONALCOLUMN_W * rgconditionalcolumn; // pointer to conditional column structure + JET_UINT32 cConditionalColumn; // number of conditional columns + JET_ERR err; // returned error code } JET_INDEXCREATEOLD_W; #ifdef JET_UNICODE @@ -1334,63 +1353,63 @@ typedef struct tagJET_INDEXCREATEOLD_W typedef struct tagJET_INDEXCREATE_A { - unsigned long cbStruct; // size of this structure (for future expansion) - char *szIndexName; // index name - char *szKey; // index key definition - unsigned long cbKey; // size of key definition in szKey - JET_GRBIT grbit; // index options - unsigned long ulDensity; // index density + JET_UINT32 cbStruct; // size of this structure (for future expansion) + JET_PSTR szIndexName; // index name + JET_PSTR szKey; // index key definition + JET_UINT32 cbKey; // size of key definition in szKey + JET_GRBIT grbit; // index options + JET_UINT32 ulDensity; // index density union { - unsigned long lcid; // lcid for the index (if JET_bitIndexUnicode NOT specified) - JET_UNICODEINDEX *pidxunicode; // pointer to JET_UNICODEINDEX struct (if JET_bitIndexUnicode specified) + JET_UINT32 lcid; // lcid for the index (if JET_bitIndexUnicode NOT specified) + JET_UNICODEINDEX * pidxunicode; // pointer to JET_UNICODEINDEX struct (if JET_bitIndexUnicode specified) }; union { - unsigned long cbVarSegMac; // maximum length of variable length columns in index key (if JET_bitIndexTupleLimits not specified) + JET_UINT32 cbVarSegMac; // maximum length of variable length columns in index key (if JET_bitIndexTupleLimits not specified) #if ( JET_VERSION >= 0x0502 ) - JET_TUPLELIMITS *ptuplelimits; // pointer to JET_TUPLELIMITS struct (if JET_bitIndexTupleLimits specified) + JET_TUPLELIMITS * ptuplelimits; // pointer to JET_TUPLELIMITS struct (if JET_bitIndexTupleLimits specified) #endif // ! JET_VERSION >= 0x0502 }; - JET_CONDITIONALCOLUMN_A *rgconditionalcolumn; // pointer to conditional column structure - unsigned long cConditionalColumn; // number of conditional columns - JET_ERR err; // returned error code + JET_CONDITIONALCOLUMN_A * rgconditionalcolumn; // pointer to conditional column structure + JET_UINT32 cConditionalColumn; // number of conditional columns + JET_ERR err; // returned error code #if ( JET_VERSION >= 0x0600 ) - unsigned long cbKeyMost; // size of key preserved in index, e.g. without truncation (if JET_bitIndexKeyMost specified) + JET_UINT32 cbKeyMost; // size of key preserved in index, e.g. without truncation (if JET_bitIndexKeyMost specified) #endif // JET_VERSION >= 0x0600 } JET_INDEXCREATE_A; typedef struct tagJET_INDEXCREATE_W { - unsigned long cbStruct; // size of this structure (for future expansion) - WCHAR *szIndexName; // index name - WCHAR *szKey; // index key definition - unsigned long cbKey; // size of key definition in szKey - JET_GRBIT grbit; // index options - unsigned long ulDensity; // index density + JET_UINT32 cbStruct; // size of this structure (for future expansion) + JET_PWSTR szIndexName; // index name + JET_PWSTR szKey; // index key definition + JET_UINT32 cbKey; // size of key definition in szKey + JET_GRBIT grbit; // index options + JET_UINT32 ulDensity; // index density union { - unsigned long lcid; // lcid for the index (if JET_bitIndexUnicode NOT specified) - JET_UNICODEINDEX *pidxunicode; // pointer to JET_UNICODEINDEX struct (if JET_bitIndexUnicode specified) + JET_UINT32 lcid; // lcid for the index (if JET_bitIndexUnicode NOT specified) + JET_UNICODEINDEX * pidxunicode; // pointer to JET_UNICODEINDEX struct (if JET_bitIndexUnicode specified) }; union { - unsigned long cbVarSegMac; // maximum length of variable length columns in index key (if JET_bitIndexTupleLimits not specified) + JET_UINT32 cbVarSegMac; // maximum length of variable length columns in index key (if JET_bitIndexTupleLimits not specified) #if ( JET_VERSION >= 0x0502 ) - JET_TUPLELIMITS *ptuplelimits; // pointer to JET_TUPLELIMITS struct (if JET_bitIndexTupleLimits specified) + JET_TUPLELIMITS * ptuplelimits; // pointer to JET_TUPLELIMITS struct (if JET_bitIndexTupleLimits specified) #endif // ! JET_VERSION >= 0x0502 }; - JET_CONDITIONALCOLUMN_W *rgconditionalcolumn; // pointer to conditional column structure - unsigned long cConditionalColumn; // number of conditional columns - JET_ERR err; // returned error code + JET_CONDITIONALCOLUMN_W * rgconditionalcolumn; // pointer to conditional column structure + JET_UINT32 cConditionalColumn; // number of conditional columns + JET_ERR err; // returned error code #if ( JET_VERSION >= 0x0600 ) - unsigned long cbKeyMost; // size of key preserved in index, e.g. without truncation (if JET_bitIndexKeyMost specified) + JET_UINT32 cbKeyMost; // size of key preserved in index, e.g. without truncation (if JET_bitIndexKeyMost specified) #endif // JET_VERSION >= 0x0600 } JET_INDEXCREATE_W; @@ -1404,58 +1423,58 @@ typedef struct tagJET_INDEXCREATE_W typedef struct tagJET_INDEXCREATE2_A { - unsigned long cbStruct; // size of this structure (for future expansion) - char *szIndexName; // index name - char *szKey; // index key definition - unsigned long cbKey; // size of key definition in szKey - JET_GRBIT grbit; // index options - unsigned long ulDensity; // index density + JET_UINT32 cbStruct; // size of this structure (for future expansion) + JET_PSTR szIndexName; // index name + JET_PSTR szKey; // index key definition + JET_UINT32 cbKey; // size of key definition in szKey + JET_GRBIT grbit; // index options + JET_UINT32 ulDensity; // index density union { - unsigned long lcid; // lcid for the index (if JET_bitIndexUnicode NOT specified) - JET_UNICODEINDEX *pidxunicode; // pointer to JET_UNICODEINDEX struct (if JET_bitIndexUnicode specified) + JET_UINT32 lcid; // lcid for the index (if JET_bitIndexUnicode NOT specified) + JET_UNICODEINDEX * pidxunicode; // pointer to JET_UNICODEINDEX struct (if JET_bitIndexUnicode specified) }; union { - unsigned long cbVarSegMac; // maximum length of variable length columns in index key (if JET_bitIndexTupleLimits not specified) - JET_TUPLELIMITS *ptuplelimits; // pointer to JET_TUPLELIMITS struct (if JET_bitIndexTupleLimits specified) + JET_UINT32 cbVarSegMac; // maximum length of variable length columns in index key (if JET_bitIndexTupleLimits not specified) + JET_TUPLELIMITS * ptuplelimits; // pointer to JET_TUPLELIMITS struct (if JET_bitIndexTupleLimits specified) }; - JET_CONDITIONALCOLUMN_A *rgconditionalcolumn; // pointer to conditional column structure - unsigned long cConditionalColumn; // number of conditional columns - JET_ERR err; // returned error code - unsigned long cbKeyMost; // size of key preserved in index, e.g. without truncation (if JET_bitIndexKeyMost specified) - JET_SPACEHINTS * pSpacehints; // space allocation, maintenance, and usage hints + JET_CONDITIONALCOLUMN_A * rgconditionalcolumn; // pointer to conditional column structure + JET_UINT32 cConditionalColumn; // number of conditional columns + JET_ERR err; // returned error code + JET_UINT32 cbKeyMost; // size of key preserved in index, e.g. without truncation (if JET_bitIndexKeyMost specified) + JET_SPACEHINTS * pSpacehints; // space allocation, maintenance, and usage hints } JET_INDEXCREATE2_A; typedef struct tagJET_INDEXCREATE2_W { - unsigned long cbStruct; // size of this structure (for future expansion) - WCHAR *szIndexName; // index name - WCHAR *szKey; // index key definition - unsigned long cbKey; // size of key definition in szKey - JET_GRBIT grbit; // index options - unsigned long ulDensity; // index density + JET_UINT32 cbStruct; // size of this structure (for future expansion) + JET_PWSTR szIndexName; // index name + JET_PWSTR szKey; // index key definition + JET_UINT32 cbKey; // size of key definition in szKey + JET_GRBIT grbit; // index options + JET_UINT32 ulDensity; // index density union { - unsigned long lcid; // lcid for the index (if JET_bitIndexUnicode NOT specified) - JET_UNICODEINDEX *pidxunicode; // pointer to JET_UNICODEINDEX struct (if JET_bitIndexUnicode specified) + JET_UINT32 lcid; // lcid for the index (if JET_bitIndexUnicode NOT specified) + JET_UNICODEINDEX * pidxunicode; // pointer to JET_UNICODEINDEX struct (if JET_bitIndexUnicode specified) }; union { - unsigned long cbVarSegMac; // maximum length of variable length columns in index key (if JET_bitIndexTupleLimits not specified) - JET_TUPLELIMITS *ptuplelimits; // pointer to JET_TUPLELIMITS struct (if JET_bitIndexTupleLimits specified) + JET_UINT32 cbVarSegMac; // maximum length of variable length columns in index key (if JET_bitIndexTupleLimits not specified) + JET_TUPLELIMITS * ptuplelimits; // pointer to JET_TUPLELIMITS struct (if JET_bitIndexTupleLimits specified) }; - JET_CONDITIONALCOLUMN_W *rgconditionalcolumn; // pointer to conditional column structure - unsigned long cConditionalColumn; // number of conditional columns - JET_ERR err; // returned error code - unsigned long cbKeyMost; // size of key preserved in index, e.g. without truncation (if JET_bitIndexKeyMost specified) - JET_SPACEHINTS * pSpacehints; // space allocation, maintenance, and usage hints + JET_CONDITIONALCOLUMN_W * rgconditionalcolumn; // pointer to conditional column structure + JET_UINT32 cConditionalColumn; // number of conditional columns + JET_ERR err; // returned error code + JET_UINT32 cbKeyMost; // size of key preserved in index, e.g. without truncation (if JET_bitIndexKeyMost specified) + JET_SPACEHINTS * pSpacehints; // space allocation, maintenance, and usage hints } JET_INDEXCREATE2_W; #ifdef JET_UNICODE @@ -1469,48 +1488,48 @@ typedef struct tagJET_INDEXCREATE2_W typedef struct tagJET_INDEXCREATE3_A { - unsigned long cbStruct; // size of this structure (for future expansion) - char *szIndexName; // index name - char *szKey; // index key definition - unsigned long cbKey; // size of key definition in szKey - JET_GRBIT grbit; // index options - unsigned long ulDensity; // index density - JET_UNICODEINDEX2 *pidxunicode; // pointer to JET_UNICODEINDEX2 struct (if JET_bitIndexUnicode specified) + JET_UINT32 cbStruct; // size of this structure (for future expansion) + JET_PSTR szIndexName; // index name + JET_PSTR szKey; // index key definition + JET_UINT32 cbKey; // size of key definition in szKey + JET_GRBIT grbit; // index options + JET_UINT32 ulDensity; // index density + JET_UNICODEINDEX2 * pidxunicode; // pointer to JET_UNICODEINDEX2 struct (if JET_bitIndexUnicode specified) union { - unsigned long cbVarSegMac; // maximum length of variable length columns in index key (if JET_bitIndexTupleLimits not specified) - JET_TUPLELIMITS *ptuplelimits; // pointer to JET_TUPLELIMITS struct (if JET_bitIndexTupleLimits specified) + JET_UINT32 cbVarSegMac; // maximum length of variable length columns in index key (if JET_bitIndexTupleLimits not specified) + JET_TUPLELIMITS * ptuplelimits; // pointer to JET_TUPLELIMITS struct (if JET_bitIndexTupleLimits specified) }; - JET_CONDITIONALCOLUMN_A *rgconditionalcolumn; // pointer to conditional column structure - unsigned long cConditionalColumn; // number of conditional columns - JET_ERR err; // returned error code - unsigned long cbKeyMost; // size of key preserved in index, e.g. without truncation (if JET_bitIndexKeyMost specified) - JET_SPACEHINTS * pSpacehints; // space allocation, maintenance, and usage hints + JET_CONDITIONALCOLUMN_A * rgconditionalcolumn; // pointer to conditional column structure + JET_UINT32 cConditionalColumn; // number of conditional columns + JET_ERR err; // returned error code + JET_UINT32 cbKeyMost; // size of key preserved in index, e.g. without truncation (if JET_bitIndexKeyMost specified) + JET_SPACEHINTS * pSpacehints; // space allocation, maintenance, and usage hints } JET_INDEXCREATE3_A; typedef struct tagJET_INDEXCREATE3_W { - unsigned long cbStruct; // size of this structure (for future expansion) - WCHAR *szIndexName; // index name - WCHAR *szKey; // index key definition - unsigned long cbKey; // size of key definition in szKey - JET_GRBIT grbit; // index options - unsigned long ulDensity; // index density - JET_UNICODEINDEX2 *pidxunicode; // pointer to JET_UNICODEINDEX2 struct (if JET_bitIndexUnicode specified) + JET_UINT32 cbStruct; // size of this structure (for future expansion) + JET_PWSTR szIndexName; // index name + JET_PWSTR szKey; // index key definition + JET_UINT32 cbKey; // size of key definition in szKey + JET_GRBIT grbit; // index options + JET_UINT32 ulDensity; // index density + JET_UNICODEINDEX2 * pidxunicode; // pointer to JET_UNICODEINDEX2 struct (if JET_bitIndexUnicode specified) union { - unsigned long cbVarSegMac; // maximum length of variable length columns in index key (if JET_bitIndexTupleLimits not specified) - JET_TUPLELIMITS *ptuplelimits; // pointer to JET_TUPLELIMITS struct (if JET_bitIndexTupleLimits specified) + JET_UINT32 cbVarSegMac; // maximum length of variable length columns in index key (if JET_bitIndexTupleLimits not specified) + JET_TUPLELIMITS * ptuplelimits; // pointer to JET_TUPLELIMITS struct (if JET_bitIndexTupleLimits specified) }; - JET_CONDITIONALCOLUMN_W *rgconditionalcolumn; // pointer to conditional column structure - unsigned long cConditionalColumn; // number of conditional columns - JET_ERR err; // returned error code - unsigned long cbKeyMost; // size of key preserved in index, e.g. without truncation (if JET_bitIndexKeyMost specified) - JET_SPACEHINTS * pSpacehints; // space allocation, maintenance, and usage hints + JET_CONDITIONALCOLUMN_W * rgconditionalcolumn; // pointer to conditional column structure + JET_UINT32 cConditionalColumn; // number of conditional columns + JET_ERR err; // returned error code + JET_UINT32 cbKeyMost; // size of key preserved in index, e.g. without truncation (if JET_bitIndexKeyMost specified) + JET_SPACEHINTS * pSpacehints; // space allocation, maintenance, and usage hints } JET_INDEXCREATE3_W; #ifdef JET_UNICODE @@ -1526,34 +1545,34 @@ typedef struct tagJET_INDEXCREATE3_W typedef struct tagJET_TABLECREATE_A { - unsigned long cbStruct; // size of this structure (for future expansion) - char *szTableName; // name of table to create. - char *szTemplateTableName; // name of table from which to inherit base DDL - unsigned long ulPages; // initial pages to allocate for table. - unsigned long ulDensity; // table density. - JET_COLUMNCREATE_A *rgcolumncreate; // array of column creation info - unsigned long cColumns; // number of columns to create - JET_INDEXCREATE_A *rgindexcreate; // array of index creation info - unsigned long cIndexes; // number of indexes to create - JET_GRBIT grbit; - JET_TABLEID tableid; // returned tableid. - unsigned long cCreated; // count of objects created (columns+table+indexes). + JET_UINT32 cbStruct; // size of this structure (for future expansion) + JET_PSTR szTableName; // name of table to create. + JET_PSTR szTemplateTableName; // name of table from which to inherit base DDL + JET_UINT32 ulPages; // initial pages to allocate for table. + JET_UINT32 ulDensity; // table density. + JET_COLUMNCREATE_A * rgcolumncreate; // array of column creation info + JET_UINT32 cColumns; // number of columns to create + JET_INDEXCREATE_A * rgindexcreate; // array of index creation info + JET_UINT32 cIndexes; // number of indexes to create + JET_GRBIT grbit; + JET_TABLEID tableid; // returned tableid. + JET_UINT32 cCreated; // count of objects created (columns+table+indexes). } JET_TABLECREATE_A; typedef struct tagJET_TABLECREATE_W { - unsigned long cbStruct; // size of this structure (for future expansion) - WCHAR *szTableName; // name of table to create. - WCHAR *szTemplateTableName; // name of table from which to inherit base DDL - unsigned long ulPages; // initial pages to allocate for table. - unsigned long ulDensity; // table density. - JET_COLUMNCREATE_W *rgcolumncreate; // array of column creation info - unsigned long cColumns; // number of columns to create - JET_INDEXCREATE_W *rgindexcreate; // array of index creation info - unsigned long cIndexes; // number of indexes to create - JET_GRBIT grbit; - JET_TABLEID tableid; // returned tableid. - unsigned long cCreated; // count of objects created (columns+table+indexes). + JET_UINT32 cbStruct; // size of this structure (for future expansion) + JET_PWSTR szTableName; // name of table to create. + JET_PWSTR szTemplateTableName; // name of table from which to inherit base DDL + JET_UINT32 ulPages; // initial pages to allocate for table. + JET_UINT32 ulDensity; // table density. + JET_COLUMNCREATE_W * rgcolumncreate; // array of column creation info + JET_UINT32 cColumns; // number of columns to create + JET_INDEXCREATE_W * rgindexcreate; // array of index creation info + JET_UINT32 cIndexes; // number of indexes to create + JET_GRBIT grbit; + JET_TABLEID tableid; // returned tableid. + JET_UINT32 cCreated; // count of objects created (columns+table+indexes). } JET_TABLECREATE_W; #ifdef JET_UNICODE @@ -1565,38 +1584,38 @@ typedef struct tagJET_TABLECREATE_W #if ( JET_VERSION >= 0x0501 ) typedef struct tagJET_TABLECREATE2_A { - unsigned long cbStruct; // size of this structure (for future expansion) - char *szTableName; // name of table to create. - char *szTemplateTableName; // name of table from which to inherit base DDL - unsigned long ulPages; // initial pages to allocate for table. - unsigned long ulDensity; // table density. - JET_COLUMNCREATE_A *rgcolumncreate; // array of column creation info - unsigned long cColumns; // number of columns to create - JET_INDEXCREATE_A *rgindexcreate; // array of index creation info - unsigned long cIndexes; // number of indexes to create - char *szCallback; // callback to use for this table - JET_CBTYP cbtyp; // when the callback should be called - JET_GRBIT grbit; - JET_TABLEID tableid; // returned tableid. - unsigned long cCreated; // count of objects created (columns+table+indexes+callbacks). + JET_UINT32 cbStruct; // size of this structure (for future expansion) + JET_PSTR szTableName; // name of table to create. + JET_PSTR szTemplateTableName; // name of table from which to inherit base DDL + JET_UINT32 ulPages; // initial pages to allocate for table. + JET_UINT32 ulDensity; // table density. + JET_COLUMNCREATE_A * rgcolumncreate; // array of column creation info + JET_UINT32 cColumns; // number of columns to create + JET_INDEXCREATE_A * rgindexcreate; // array of index creation info + JET_UINT32 cIndexes; // number of indexes to create + JET_PSTR szCallback; // callback to use for this table + JET_CBTYP cbtyp; // when the callback should be called + JET_GRBIT grbit; + JET_TABLEID tableid; // returned tableid. + JET_UINT32 cCreated; // count of objects created (columns+table+indexes+callbacks). } JET_TABLECREATE2_A; typedef struct tagJET_TABLECREATE2_W { - unsigned long cbStruct; // size of this structure (for future expansion) - WCHAR *szTableName; // name of table to create. - WCHAR *szTemplateTableName; // name of table from which to inherit base DDL - unsigned long ulPages; // initial pages to allocate for table. - unsigned long ulDensity; // table density. - JET_COLUMNCREATE_W *rgcolumncreate; // array of column creation info - unsigned long cColumns; // number of columns to create - JET_INDEXCREATE_W *rgindexcreate; // array of index creation info - unsigned long cIndexes; // number of indexes to create - WCHAR *szCallback; // callback to use for this table - JET_CBTYP cbtyp; // when the callback should be called - JET_GRBIT grbit; - JET_TABLEID tableid; // returned tableid. - unsigned long cCreated; // count of objects created (columns+table+indexes+callbacks). + JET_UINT32 cbStruct; // size of this structure (for future expansion) + JET_PWSTR szTableName; // name of table to create. + JET_PWSTR szTemplateTableName; // name of table from which to inherit base DDL + JET_UINT32 ulPages; // initial pages to allocate for table. + JET_UINT32 ulDensity; // table density. + JET_COLUMNCREATE_W * rgcolumncreate; // array of column creation info + JET_UINT32 cColumns; // number of columns to create + JET_INDEXCREATE_W * rgindexcreate; // array of index creation info + JET_UINT32 cIndexes; // number of indexes to create + JET_PWSTR szCallback; // callback to use for this table + JET_CBTYP cbtyp; // when the callback should be called + JET_GRBIT grbit; + JET_TABLEID tableid; // returned tableid. + JET_UINT32 cCreated; // count of objects created (columns+table+indexes+callbacks). } JET_TABLECREATE2_W; #ifdef JET_UNICODE @@ -1611,45 +1630,45 @@ typedef struct tagJET_TABLECREATE2_W #if ( JET_VERSION >= 0x0601 ) typedef struct tagJET_TABLECREATE3_A { - unsigned long cbStruct; // size of this structure (for future expansion) - char *szTableName; // name of table to create. - char *szTemplateTableName; // name of table from which to inherit base DDL - unsigned long ulPages; // initial pages to allocate for table. - unsigned long ulDensity; // table density. - JET_COLUMNCREATE_A *rgcolumncreate; // array of column creation info - unsigned long cColumns; // number of columns to create - JET_INDEXCREATE2_A *rgindexcreate; // array of index creation info - unsigned long cIndexes; // number of indexes to create - char *szCallback; // callback to use for this table - JET_CBTYP cbtyp; // when the callback should be called - JET_GRBIT grbit; - JET_SPACEHINTS * pSeqSpacehints; // space allocation, maintenance, and usage hints for default sequential index - JET_SPACEHINTS * pLVSpacehints; // space allocation, maintenance, and usage hints for Separated LV tree. - unsigned long cbSeparateLV; // heuristic size to separate a intrinsic LV from the primary record - - JET_TABLEID tableid; // returned tableid. - unsigned long cCreated; // count of objects created (columns+table+indexes+callbacks). + JET_UINT32 cbStruct; // size of this structure (for future expansion) + JET_PSTR szTableName; // name of table to create. + JET_PSTR szTemplateTableName; // name of table from which to inherit base DDL + JET_UINT32 ulPages; // initial pages to allocate for table. + JET_UINT32 ulDensity; // table density. + JET_COLUMNCREATE_A * rgcolumncreate; // array of column creation info + JET_UINT32 cColumns; // number of columns to create + JET_INDEXCREATE2_A * rgindexcreate; // array of index creation info + JET_UINT32 cIndexes; // number of indexes to create + JET_PSTR szCallback; // callback to use for this table + JET_CBTYP cbtyp; // when the callback should be called + JET_GRBIT grbit; + JET_SPACEHINTS * pSeqSpacehints; // space allocation, maintenance, and usage hints for default sequential index + JET_SPACEHINTS * pLVSpacehints; // space allocation, maintenance, and usage hints for Separated LV tree. + JET_UINT32 cbSeparateLV; // heuristic size to separate a intrinsic LV from the primary record + + JET_TABLEID tableid; // returned tableid. + JET_UINT32 cCreated; // count of objects created (columns+table+indexes+callbacks). } JET_TABLECREATE3_A; typedef struct tagJET_TABLECREATE3_W { - unsigned long cbStruct; // size of this structure (for future expansion) - WCHAR *szTableName; // name of table to create. - WCHAR *szTemplateTableName; // name of table from which to inherit base DDL - unsigned long ulPages; // initial pages to allocate for table. - unsigned long ulDensity; // table density. - JET_COLUMNCREATE_W *rgcolumncreate; // array of column creation info - unsigned long cColumns; // number of columns to create - JET_INDEXCREATE2_W *rgindexcreate; // array of index creation info - unsigned long cIndexes; // number of indexes to create - WCHAR *szCallback; // callback to use for this table - JET_CBTYP cbtyp; // when the callback should be called - JET_GRBIT grbit; - JET_SPACEHINTS * pSeqSpacehints; // space allocation, maintenance, and usage hints for default sequential index - JET_SPACEHINTS * pLVSpacehints; // space allocation, maintenance, and usage hints for Separated LV tree. - unsigned long cbSeparateLV; // heuristic size to separate a intrinsic LV from the primary record - JET_TABLEID tableid; // returned tableid. - unsigned long cCreated; // count of objects created (columns+table+indexes+callbacks). + JET_UINT32 cbStruct; // size of this structure (for future expansion) + JET_PWSTR szTableName; // name of table to create. + JET_PWSTR szTemplateTableName; // name of table from which to inherit base DDL + JET_UINT32 ulPages; // initial pages to allocate for table. + JET_UINT32 ulDensity; // table density. + JET_COLUMNCREATE_W * rgcolumncreate; // array of column creation info + JET_UINT32 cColumns; // number of columns to create + JET_INDEXCREATE2_W * rgindexcreate; // array of index creation info + JET_UINT32 cIndexes; // number of indexes to create + JET_PWSTR szCallback; // callback to use for this table + JET_CBTYP cbtyp; // when the callback should be called + JET_GRBIT grbit; + JET_SPACEHINTS * pSeqSpacehints; // space allocation, maintenance, and usage hints for default sequential index + JET_SPACEHINTS * pLVSpacehints; // space allocation, maintenance, and usage hints for Separated LV tree. + JET_UINT32 cbSeparateLV; // heuristic size to separate a intrinsic LV from the primary record + JET_TABLEID tableid; // returned tableid. + JET_UINT32 cCreated; // count of objects created (columns+table+indexes+callbacks). } JET_TABLECREATE3_W; #ifdef JET_UNICODE @@ -1663,46 +1682,46 @@ typedef struct tagJET_TABLECREATE3_W #if ( JET_VERSION >= 0x0602 ) typedef struct tagJET_TABLECREATE4_A { - unsigned long cbStruct; // size of this structure (for future expansion) - char *szTableName; // name of table to create. - char *szTemplateTableName; // name of table from which to inherit base DDL - unsigned long ulPages; // initial pages to allocate for table. - unsigned long ulDensity; // table density. - JET_COLUMNCREATE_A *rgcolumncreate; // array of column creation info - unsigned long cColumns; // number of columns to create - JET_INDEXCREATE3_A *rgindexcreate; // array of index creation info - unsigned long cIndexes; // number of indexes to create - char *szCallback; // callback to use for this table - JET_CBTYP cbtyp; // when the callback should be called - JET_GRBIT grbit; - JET_SPACEHINTS * pSeqSpacehints; // space allocation, maintenance, and usage hints for default sequential index - JET_SPACEHINTS * pLVSpacehints; // space allocation, maintenance, and usage hints for Separated LV tree. - unsigned long cbSeparateLV; // heuristic size to separate a intrinsic LV from the primary record - - JET_TABLEID tableid; // returned tableid. - unsigned long cCreated; // count of objects created (columns+table+indexes+callbacks). + JET_UINT32 cbStruct; // size of this structure (for future expansion) + JET_PSTR szTableName; // name of table to create. + JET_PSTR szTemplateTableName; // name of table from which to inherit base DDL + JET_UINT32 ulPages; // initial pages to allocate for table. + JET_UINT32 ulDensity; // table density. + JET_COLUMNCREATE_A * rgcolumncreate; // array of column creation info + JET_UINT32 cColumns; // number of columns to create + JET_INDEXCREATE3_A * rgindexcreate; // array of index creation info + JET_UINT32 cIndexes; // number of indexes to create + JET_PSTR szCallback; // callback to use for this table + JET_CBTYP cbtyp; // when the callback should be called + JET_GRBIT grbit; + JET_SPACEHINTS * pSeqSpacehints; // space allocation, maintenance, and usage hints for default sequential index + JET_SPACEHINTS * pLVSpacehints; // space allocation, maintenance, and usage hints for Separated LV tree. + JET_UINT32 cbSeparateLV; // heuristic size to separate a intrinsic LV from the primary record + + JET_TABLEID tableid; // returned tableid. + JET_UINT32 cCreated; // count of objects created (columns+table+indexes+callbacks). } JET_TABLECREATE4_A; typedef struct tagJET_TABLECREATE4_W { - unsigned long cbStruct; // size of this structure (for future expansion) - WCHAR *szTableName; // name of table to create. - WCHAR *szTemplateTableName; // name of table from which to inherit base DDL - unsigned long ulPages; // initial pages to allocate for table. - unsigned long ulDensity; // table density. - JET_COLUMNCREATE_W *rgcolumncreate; // array of column creation info - unsigned long cColumns; // number of columns to create - JET_INDEXCREATE3_W *rgindexcreate; // array of index creation info - unsigned long cIndexes; // number of indexes to create - WCHAR *szCallback; // callback to use for this table - JET_CBTYP cbtyp; // when the callback should be called - JET_GRBIT grbit; - JET_SPACEHINTS * pSeqSpacehints; // space allocation, maintenance, and usage hints for default sequential index - JET_SPACEHINTS * pLVSpacehints; // space allocation, maintenance, and usage hints for Separated LV tree. - unsigned long cbSeparateLV; // heuristic size to separate a intrinsic LV from the primary record - - JET_TABLEID tableid; // returned tableid. - unsigned long cCreated; // count of objects created (columns+table+indexes+callbacks). + JET_UINT32 cbStruct; // size of this structure (for future expansion) + JET_PWSTR szTableName; // name of table to create. + JET_PWSTR szTemplateTableName; // name of table from which to inherit base DDL + JET_UINT32 ulPages; // initial pages to allocate for table. + JET_UINT32 ulDensity; // table density. + JET_COLUMNCREATE_W * rgcolumncreate; // array of column creation info + JET_UINT32 cColumns; // number of columns to create + JET_INDEXCREATE3_W * rgindexcreate; // array of index creation info + JET_UINT32 cIndexes; // number of indexes to create + JET_PWSTR szCallback; // callback to use for this table + JET_CBTYP cbtyp; // when the callback should be called + JET_GRBIT grbit; + JET_SPACEHINTS * pSeqSpacehints; // space allocation, maintenance, and usage hints for default sequential index + JET_SPACEHINTS * pLVSpacehints; // space allocation, maintenance, and usage hints for Separated LV tree. + JET_UINT32 cbSeparateLV; // heuristic size to separate a intrinsic LV from the primary record + + JET_TABLEID tableid; // returned tableid. + JET_UINT32 cCreated; // count of objects created (columns+table+indexes+callbacks). } JET_TABLECREATE4_W; #ifdef JET_UNICODE @@ -1717,48 +1736,48 @@ typedef struct tagJET_TABLECREATE4_W #if ( JET_VERSION >= 0x0A01 ) typedef struct tagJET_TABLECREATE5_A { - unsigned long cbStruct; // size of this structure (for future expansion) - char *szTableName; // name of table to create. - char *szTemplateTableName; // name of table from which to inherit base DDL - unsigned long ulPages; // initial pages to allocate for table. - unsigned long ulDensity; // table density. - JET_COLUMNCREATE_A *rgcolumncreate; // array of column creation info - unsigned long cColumns; // number of columns to create - JET_INDEXCREATE3_A *rgindexcreate; // array of index creation info - unsigned long cIndexes; // number of indexes to create - char *szCallback; // callback to use for this table - JET_CBTYP cbtyp; // when the callback should be called - JET_GRBIT grbit; - JET_SPACEHINTS * pSeqSpacehints; // space allocation, maintenance, and usage hints for default sequential index - JET_SPACEHINTS * pLVSpacehints; // space allocation, maintenance, and usage hints for Separated LV tree. - unsigned long cbSeparateLV; // heuristic size to separate a intrinsic LV from the primary record - unsigned long cbLVChunkMax; // Maximum chunk size to use for Separated LVs - - JET_TABLEID tableid; // returned tableid. - unsigned long cCreated; // count of objects created (columns+table+indexes+callbacks). + JET_UINT32 cbStruct; // size of this structure (for future expansion) + JET_PSTR szTableName; // name of table to create. + JET_PSTR szTemplateTableName; // name of table from which to inherit base DDL + JET_UINT32 ulPages; // initial pages to allocate for table. + JET_UINT32 ulDensity; // table density. + JET_COLUMNCREATE_A * rgcolumncreate; // array of column creation info + JET_UINT32 cColumns; // number of columns to create + JET_INDEXCREATE3_A * rgindexcreate; // array of index creation info + JET_UINT32 cIndexes; // number of indexes to create + JET_PSTR szCallback; // callback to use for this table + JET_CBTYP cbtyp; // when the callback should be called + JET_GRBIT grbit; + JET_SPACEHINTS * pSeqSpacehints; // space allocation, maintenance, and usage hints for default sequential index + JET_SPACEHINTS * pLVSpacehints; // space allocation, maintenance, and usage hints for Separated LV tree. + JET_UINT32 cbSeparateLV; // heuristic size to separate a intrinsic LV from the primary record + JET_UINT32 cbLVChunkMax; // Maximum chunk size to use for Separated LVs + + JET_TABLEID tableid; // returned tableid. + JET_UINT32 cCreated; // count of objects created (columns+table+indexes+callbacks). } JET_TABLECREATE5_A; typedef struct tagJET_TABLECREATE5_W { - unsigned long cbStruct; // size of this structure (for future expansion) - WCHAR *szTableName; // name of table to create. - WCHAR *szTemplateTableName; // name of table from which to inherit base DDL - unsigned long ulPages; // initial pages to allocate for table. - unsigned long ulDensity; // table density. - JET_COLUMNCREATE_W *rgcolumncreate; // array of column creation info - unsigned long cColumns; // number of columns to create - JET_INDEXCREATE3_W *rgindexcreate; // array of index creation info - unsigned long cIndexes; // number of indexes to create - WCHAR *szCallback; // callback to use for this table - JET_CBTYP cbtyp; // when the callback should be called - JET_GRBIT grbit; - JET_SPACEHINTS * pSeqSpacehints; // space allocation, maintenance, and usage hints for default sequential index - JET_SPACEHINTS * pLVSpacehints; // space allocation, maintenance, and usage hints for Separated LV tree. - unsigned long cbSeparateLV; // heuristic size to separate a intrinsic LV from the primary record - unsigned long cbLVChunkMax; // Maximum chunk size to use for Separated LVs - - JET_TABLEID tableid; // returned tableid. - unsigned long cCreated; // count of objects created (columns+table+indexes+callbacks). + JET_UINT32 cbStruct; // size of this structure (for future expansion) + JET_PWSTR szTableName; // name of table to create. + JET_PWSTR szTemplateTableName; // name of table from which to inherit base DDL + JET_UINT32 ulPages; // initial pages to allocate for table. + JET_UINT32 ulDensity; // table density. + JET_COLUMNCREATE_W * rgcolumncreate; // array of column creation info + JET_UINT32 cColumns; // number of columns to create + JET_INDEXCREATE3_W * rgindexcreate; // array of index creation info + JET_UINT32 cIndexes; // number of indexes to create + JET_PWSTR szCallback; // callback to use for this table + JET_CBTYP cbtyp; // when the callback should be called + JET_GRBIT grbit; + JET_SPACEHINTS * pSeqSpacehints; // space allocation, maintenance, and usage hints for default sequential index + JET_SPACEHINTS * pLVSpacehints; // space allocation, maintenance, and usage hints for Separated LV tree. + JET_UINT32 cbSeparateLV; // heuristic size to separate a intrinsic LV from the primary record + JET_UINT32 cbLVChunkMax; // Maximum chunk size to use for Separated LVs + + JET_TABLEID tableid; // returned tableid. + JET_UINT32 cCreated; // count of objects created (columns+table+indexes+callbacks). } JET_TABLECREATE5_W; #ifdef JET_UNICODE @@ -1773,54 +1792,54 @@ typedef struct tagJET_TABLECREATE5_W #if ( JET_VERSION >= 0x0600 ) typedef struct tagJET_OPENTEMPORARYTABLE { - unsigned long cbStruct; // size of this structure (for future expansion) - const JET_COLUMNDEF *prgcolumndef; - unsigned long ccolumn; - JET_UNICODEINDEX *pidxunicode; - JET_GRBIT grbit; - JET_COLUMNID *prgcolumnid; - unsigned long cbKeyMost; - unsigned long cbVarSegMac; - JET_TABLEID tableid; + JET_UINT32 cbStruct; // size of this structure (for future expansion) + const JET_COLUMNDEF * prgcolumndef; + JET_UINT32 ccolumn; + JET_UNICODEINDEX * pidxunicode; + JET_GRBIT grbit; + JET_COLUMNID * prgcolumnid; + JET_UINT32 cbKeyMost; + JET_UINT32 cbVarSegMac; + JET_TABLEID tableid; } JET_OPENTEMPORARYTABLE; #endif // JET_VERSION >= 0x0600 #if ( JET_VERSION >= 0x0602 ) typedef struct tagJET_OPENTEMPORARYTABLE2 { - unsigned long cbStruct; // size of this structure (for future expansion) - const JET_COLUMNDEF *prgcolumndef; - unsigned long ccolumn; - JET_UNICODEINDEX2 *pidxunicode; - JET_GRBIT grbit; - JET_COLUMNID *prgcolumnid; - unsigned long cbKeyMost; - unsigned long cbVarSegMac; - JET_TABLEID tableid; + JET_UINT32 cbStruct; // size of this structure (for future expansion) + const JET_COLUMNDEF * prgcolumndef; + JET_UINT32 ccolumn; + JET_UNICODEINDEX2 * pidxunicode; + JET_GRBIT grbit; + JET_COLUMNID * prgcolumnid; + JET_UINT32 cbKeyMost; + JET_UINT32 cbVarSegMac; + JET_TABLEID tableid; } JET_OPENTEMPORARYTABLE2; #endif // JET_VERSION >= 0x0602 typedef struct { - unsigned long cbStruct; - unsigned long ibLongValue; - unsigned long itagSequence; + JET_UINT32 cbStruct; + JET_UINT32 ibLongValue; + JET_UINT32 itagSequence; JET_COLUMNID columnidNextTagged; } JET_RETINFO; typedef struct { - unsigned long cbStruct; - unsigned long ibLongValue; - unsigned long itagSequence; + JET_UINT32 cbStruct; + JET_UINT32 ibLongValue; + JET_UINT32 itagSequence; } JET_SETINFO; typedef struct { - unsigned long cbStruct; - unsigned long centriesLT; - unsigned long centriesInRange; - unsigned long centriesTotal; + JET_UINT32 cbStruct; + JET_UINT32 centriesLT; + JET_UINT32 centriesInRange; + JET_UINT32 centriesTotal; } JET_RECPOS; // On input to JetGotoPosition, centriesLTDeprecated and centriesTotalDeprecated must be 0. @@ -1828,25 +1847,25 @@ typedef struct // hold potentially truncated versions of centriesLT and centriesTotal. typedef struct { - unsigned long cbStruct; - unsigned long centriesLTDeprecated; - unsigned long centriesInRangeDeprecated; - unsigned long centriesTotalDeprecated; - unsigned long long centriesLT; - unsigned long long centriesTotal; + JET_UINT32 cbStruct; + JET_UINT32 centriesLTDeprecated; + JET_UINT32 centriesInRangeDeprecated; + JET_UINT32 centriesTotalDeprecated; + JET_UINT64 centriesLT; + JET_UINT64 centriesTotal; } JET_RECPOS2; typedef struct { - unsigned long cbStruct; + JET_UINT32 cbStruct; JET_TABLEID tableid; - unsigned long cRecord; + JET_UINT32 cRecord; JET_COLUMNID columnidBookmark; } JET_RECORDLIST; typedef struct { - unsigned long cbStruct; + JET_UINT32 cbStruct; JET_TABLEID tableid; JET_GRBIT grbit; } JET_INDEXRANGE; @@ -1872,17 +1891,17 @@ typedef struct { JET_COLUMNID columnid; // columnid of the column JET_RELOP relop; // relational operator - void * pv; // pointer to the value to use - unsigned long cb; // size of the value to use + JET_PVOID pv; // pointer to the value to use + JET_UINT32 cb; // size of the value to use JET_GRBIT grbit; // optional grbits } JET_INDEX_COLUMN; typedef struct { JET_INDEX_COLUMN * rgStartColumns; - unsigned long cStartColumns; + JET_UINT32 cStartColumns; JET_INDEX_COLUMN * rgEndColumns; - unsigned long cEndColumns; + JET_UINT32 cEndColumns; } JET_INDEX_RANGE; #endif // JET_VERSION >= 0x0602 @@ -1905,15 +1924,15 @@ typedef enum #if ( JET_VERSION >= 0x0501 ) typedef struct tagDDLADDCALLBACK_A { - char *szTable; - char *szCallback; + JET_PSTR szTable; + JET_PSTR szCallback; JET_CBTYP cbtyp; } JET_DDLADDCALLBACK_A; typedef struct tagDDLADDCALLBACK_W { - WCHAR *szTable; - WCHAR *szCallback; + JET_PWSTR szTable; + JET_PWSTR szCallback; JET_CBTYP cbtyp; } JET_DDLADDCALLBACK_W; @@ -1926,16 +1945,16 @@ typedef struct tagDDLADDCALLBACK_W typedef struct tagDDLCHANGECOLUMN_A { - char *szTable; - char *szColumn; + JET_PSTR szTable; + JET_PSTR szColumn; JET_COLTYP coltypNew; JET_GRBIT grbitNew; } JET_DDLCHANGECOLUMN_A; typedef struct tagDDLCHANGECOLUMN_W { - WCHAR *szTable; - WCHAR *szColumn; + JET_PWSTR szTable; + JET_PWSTR szColumn; JET_COLTYP coltypNew; JET_GRBIT grbitNew; } JET_DDLCHANGECOLUMN_W; @@ -1950,16 +1969,16 @@ typedef struct tagDDLCHANGECOLUMN_W typedef struct tagDDLMAXCOLUMNSIZE_A { - char *szTable; - char *szColumn; - unsigned long cbMax; + JET_PSTR szTable; + JET_PSTR szColumn; + JET_UINT32 cbMax; } JET_DDLMAXCOLUMNSIZE_A; typedef struct tagDDLMAXCOLUMNSIZE_W { - WCHAR *szTable; - WCHAR *szColumn; - unsigned long cbMax; + JET_PWSTR szTable; + JET_PWSTR szColumn; + JET_UINT32 cbMax; } JET_DDLMAXCOLUMNSIZE_W; #ifdef JET_UNICODE @@ -1970,16 +1989,16 @@ typedef struct tagDDLMAXCOLUMNSIZE_W typedef struct tagDDLADDCONDITIONALCOLUMNSTOALLINDEXES_A { - char * szTable; // name of table to convert - JET_CONDITIONALCOLUMN_A * rgconditionalcolumn; // pointer to conditional column structure - unsigned long cConditionalColumn; // number of conditional columns + JET_PSTR szTable; // name of table to convert + JET_CONDITIONALCOLUMN_A * rgconditionalcolumn; // pointer to conditional column structure + JET_UINT32 cConditionalColumn; // number of conditional columns } JET_DDLADDCONDITIONALCOLUMNSTOALLINDEXES_A; typedef struct tagDDLADDCONDITIONALCOLUMNSTOALLINDEXES_W { - WCHAR * szTable; // name of table to convert - JET_CONDITIONALCOLUMN_W * rgconditionalcolumn; // pointer to conditional column structure - unsigned long cConditionalColumn; // number of conditional columns + JET_PWSTR szTable; // name of table to convert + JET_CONDITIONALCOLUMN_W * rgconditionalcolumn; // pointer to conditional column structure + JET_UINT32 cConditionalColumn; // number of conditional columns } JET_DDLADDCONDITIONALCOLUMNSTOALLINDEXES_W; #ifdef JET_UNICODE @@ -1991,20 +2010,20 @@ typedef struct tagDDLADDCONDITIONALCOLUMNSTOALLINDEXES_W typedef struct tagDDLADDCOLUMCALLBACK_A { - char *szTable; - char *szColumn; - char *szCallback; - void *pvCallbackData; - unsigned long cbCallbackData; + JET_PSTR szTable; + JET_PSTR szColumn; + JET_PSTR szCallback; + JET_PVOID pvCallbackData; + JET_UINT32 cbCallbackData; } JET_DDLADDCOLUMNCALLBACK_A; typedef struct tagDDLADDCOLUMCALLBACK_W { - WCHAR *szTable; - WCHAR *szColumn; - WCHAR *szCallback; - void *pvCallbackData; - unsigned long cbCallbackData; + JET_PWSTR szTable; + JET_PWSTR szColumn; + JET_PWSTR szCallback; + JET_PVOID pvCallbackData; + JET_UINT32 cbCallbackData; } JET_DDLADDCOLUMNCALLBACK_W; #ifdef JET_UNICODE @@ -2015,16 +2034,16 @@ typedef struct tagDDLADDCOLUMCALLBACK_W typedef struct tagDDLINDEXDENSITY_A { - char *szTable; - char *szIndex; // pass NULL to change density of primary index - unsigned long ulDensity; + JET_PSTR szTable; + JET_PSTR szIndex; // pass NULL to change density of primary index + JET_UINT32 ulDensity; } JET_DDLINDEXDENSITY_A; typedef struct tagDDLINDEXDENSITY_W { - WCHAR *szTable; - WCHAR *szIndex; // pass NULL to change density of primary index - unsigned long ulDensity; + JET_PWSTR szTable; + JET_PWSTR szIndex; // pass NULL to change density of primary index + JET_UINT32 ulDensity; } JET_DDLINDEXDENSITY_W; #ifdef JET_UNICODE @@ -2035,14 +2054,14 @@ typedef struct tagDDLINDEXDENSITY_W typedef struct tagDDLCALLBACKDLL_A { - char *szOldDLL; - char *szNewDLL; + JET_PSTR szOldDLL; + JET_PSTR szNewDLL; } JET_DDLCALLBACKDLL_A; typedef struct tagDDLCALLBACKDLL_W { - WCHAR *szOldDLL; - WCHAR *szNewDLL; + JET_PWSTR szOldDLL; + JET_PWSTR szNewDLL; } JET_DDLCALLBACKDLL_W; #ifdef JET_UNICODE @@ -2056,11 +2075,11 @@ typedef struct tagDDLCALLBACKDLL_W typedef struct { - void *pvReserved1; // internally use - void *pvReserved2; - unsigned long cbActual; // the actual number of bytes read through this IO + JET_PVOID pvReserved1; // internally use + JET_PVOID pvReserved2; + JET_UINT32 cbActual; // the actual number of bytes read through this IO JET_HANDLE hSig; // a manual reset signal to wait for the IO to complete. - JET_ERR err; // Err code for this assync IO. + JET_ERR err; // Err code for this assync IO. } JET_OLP; // begin_PubEsent @@ -2070,29 +2089,29 @@ typedef struct typedef struct { - char bSeconds; // 0 - 59 - char bMinutes; // 0 - 59 - char bHours; // 0 - 23 - char bDay; // 1 - 31 - char bMonth; // 1 - 12 - char bYear; // current year - 1900 + JET_INT8 bSeconds; // 0 - 59 + JET_INT8 bMinutes; // 0 - 59 + JET_INT8 bHours; // 0 - 23 + JET_INT8 bDay; // 1 - 31 + JET_INT8 bMonth; // 1 - 12 + JET_INT8 bYear; // current year - 1900 union { - char bFiller1; + JET_BYTE bFiller1; struct { - unsigned char fTimeIsUTC:1; - unsigned char bMillisecondsLow:7; + JET_BYTE fTimeIsUTC:1; + JET_BYTE bMillisecondsLow:7; }; }; union { - char bFiller2; + JET_BYTE bFiller2; struct { - unsigned char fReserved:1; - unsigned char bMillisecondsHigh:3; - unsigned char fUnused:4; + JET_BYTE fReserved:1; + JET_BYTE bMillisecondsHigh:3; + JET_BYTE fUnused:4; }; }; } JET_LOGTIME; @@ -2103,29 +2122,29 @@ typedef struct // compatibility reasons typedef struct { - char bSeconds; // 0 - 59 - char bMinutes; // 0 - 59 - char bHours; // 0 - 23 - char bDay; // 1 - 31 - char bMonth; // 1 - 12 - char bYear; // current year - 1900 + JET_INT8 bSeconds; // 0 - 59 + JET_INT8 bMinutes; // 0 - 59 + JET_INT8 bHours; // 0 - 23 + JET_INT8 bDay; // 1 - 31 + JET_INT8 bMonth; // 1 - 12 + JET_INT8 bYear; // current year - 1900 union { - char bFiller1; + JET_BYTE bFiller1; struct { - unsigned char fTimeIsUTC:1; - unsigned char bMillisecondsLow:7; + JET_BYTE fTimeIsUTC:1; + JET_BYTE bMillisecondsLow:7; }; }; union { - char bFiller2; + JET_BYTE bFiller2; struct { - unsigned char fOSSnapshot:1; - unsigned char bMillisecondsHigh:3; - unsigned char fReserved:4; + JET_BYTE fOSSnapshot:1; + JET_BYTE bMillisecondsHigh:3; + JET_BYTE fReserved:4; }; }; } JET_BKLOGTIME; @@ -2133,24 +2152,24 @@ typedef struct typedef struct { - unsigned short ib; // must be the last so that lgpos can - unsigned short isec; // index of disksec starting logsec - long lGeneration; // generation of logsec + JET_UINT16 ib; // must be the last so that lgpos can + JET_UINT16 isec; // index of disksec starting logsec + JET_INT32 lGeneration; // generation of logsec } JET_LGPOS; // be casted to TIME. typedef struct { - unsigned long ulRandom; // a random number + JET_UINT32 ulRandom; // a random number JET_LOGTIME logtimeCreate; // time db created, in logtime format - char szComputerName[ JET_MAX_COMPUTERNAME_LENGTH + 1 ]; // where db is created + JET_CHAR szComputerName[ JET_MAX_COMPUTERNAME_LENGTH + 1 ]; // where db is created } JET_SIGNATURE; // end_PubEsent #if ( JET_VERSION >= 0x0600 ) typedef struct { - unsigned long genMin; - unsigned long genMax; + JET_UINT32 genMin; + JET_UINT32 genMax; JET_LOGTIME logtimeGenMaxCreate; } JET_CHECKPOINTINFO; #endif // JET_VERSION >= 0x0600 @@ -2166,19 +2185,19 @@ typedef struct JET_BKLOGTIME bklogtimeMark; #endif // JET_VERSION >= 0x0600 }; - unsigned long genLow; - unsigned long genHigh; + JET_UINT32 genLow; + JET_UINT32 genHigh; } JET_BKINFO; #include typedef struct { - unsigned long ulVersion; // the major (incompatible) version of DAE from the last engine attach/create. - unsigned long ulUpdate; // used to track incremental database format "update (major)" version from the + JET_UINT32 ulVersion; // the major (incompatible) version of DAE from the last engine attach/create. + JET_UINT32 ulUpdate; // used to track incremental database format "update (major)" version from the // last attach/create that is a backward-compatible major update. JET_SIGNATURE signDb; // (28 bytes) signature of the db (incl. creation time). - unsigned long dbstate; // consistent/inconsistent state + JET_UINT32 dbstate; // consistent/inconsistent state JET_LGPOS lgposConsistent; // null if in inconsistent state JET_LOGTIME logtimeConsistent; // null if in inconsistent state @@ -2197,29 +2216,29 @@ typedef struct // Reset when bkinfoFullPrev is set JET_BKINFO bkinfoFullCur; // current backup. Succeed if a // corresponding pat file generated. - unsigned long fShadowingDisabled; - unsigned long fUpgradeDb; + JET_UINT32 fShadowingDisabled; + JET_UINT32 fUpgradeDb; // NT version information. This is needed to decide if an index need // be recreated due to sort table changes. - unsigned long dwMajorVersion; /* OS version info */ - unsigned long dwMinorVersion; - unsigned long dwBuildNumber; - long lSPNumber; + JET_UINT32 dwMajorVersion; // OS version info + JET_UINT32 dwMinorVersion; + JET_UINT32 dwBuildNumber; + JET_INT32 lSPNumber; - unsigned long cbPageSize; // database page size (0 = 4k pages) + JET_UINT32 cbPageSize; // database page size (0 = 4k pages) } JET_DBINFOMISC; #if ( JET_VERSION >= 0x0600 ) typedef struct { - unsigned long ulVersion; // the major (incompatible) version of DAE from the last engine attach/create. - unsigned long ulUpdate; // used to track incremental database format "update (major)" version from the + JET_UINT32 ulVersion; // the major (incompatible) version of DAE from the last engine attach/create. + JET_UINT32 ulUpdate; // used to track incremental database format "update (major)" version from the // last attach/create that is a backward-compatible major update. JET_SIGNATURE signDb; // (28 bytes) signature of the db (incl. creation time). - unsigned long dbstate; // consistent/inconsistent state + JET_UINT32 dbstate; // consistent/inconsistent state JET_LGPOS lgposConsistent; // null if in inconsistent state JET_LOGTIME logtimeConsistent; // null if in inconsistent state @@ -2238,39 +2257,39 @@ typedef struct // Reset when bkinfoFullPrev is set JET_BKINFO bkinfoFullCur; // current backup. Succeed if a // corresponding pat file generated. - unsigned long fShadowingDisabled; - unsigned long fUpgradeDb; + JET_UINT32 fShadowingDisabled; + JET_UINT32 fUpgradeDb; // NT version information. This is needed to decide if an index need // be recreated due to sort table changes. - unsigned long dwMajorVersion; /* OS version info */ - unsigned long dwMinorVersion; - unsigned long dwBuildNumber; - long lSPNumber; + JET_UINT32 dwMajorVersion; // OS version info + JET_UINT32 dwMinorVersion; + JET_UINT32 dwBuildNumber; + JET_INT32 lSPNumber; - unsigned long cbPageSize; // database page size (0 = 4k pages) + JET_UINT32 cbPageSize; // database page size (0 = 4k pages) // new fields added on top of the above JET_DBINFOMISC - unsigned long genMinRequired; // the minimum log generation required for replaying the logs. Typically the checkpoint generation - unsigned long genMaxRequired; // the maximum log generation required for replaying the logs. + JET_UINT32 genMinRequired; // the minimum log generation required for replaying the logs. Typically the checkpoint generation + JET_UINT32 genMaxRequired; // the maximum log generation required for replaying the logs. JET_LOGTIME logtimeGenMaxCreate; // creation time of the genMax log file - unsigned long ulRepairCount; // number of times repair has been called on this database + JET_UINT32 ulRepairCount; // number of times repair has been called on this database JET_LOGTIME logtimeRepair; // the date of the last time that repair was run - unsigned long ulRepairCountOld; // number of times ErrREPAIRAttachForRepair has been called on this database before the last defrag + JET_UINT32 ulRepairCountOld; // number of times ErrREPAIRAttachForRepair has been called on this database before the last defrag - unsigned long ulECCFixSuccess; // number of times a one bit error was fixed and resulted in a good page + JET_UINT32 ulECCFixSuccess; // number of times a one bit error was fixed and resulted in a good page JET_LOGTIME logtimeECCFixSuccess; // the date of the last time that a one bit error was fixed and resulted in a good page - unsigned long ulECCFixSuccessOld; // number of times a one bit error was fixed and resulted in a good page before last repair + JET_UINT32 ulECCFixSuccessOld; // number of times a one bit error was fixed and resulted in a good page before last repair - unsigned long ulECCFixFail; // number of times a one bit error was fixed and resulted in a bad page + JET_UINT32 ulECCFixFail; // number of times a one bit error was fixed and resulted in a bad page JET_LOGTIME logtimeECCFixFail; // the date of the last time that a one bit error was fixed and resulted in a bad page - unsigned long ulECCFixFailOld; // number of times a one bit error was fixed and resulted in a bad page before last repair + JET_UINT32 ulECCFixFailOld; // number of times a one bit error was fixed and resulted in a bad page before last repair - unsigned long ulBadChecksum; // number of times a non-correctable ECC/checksum error was found + JET_UINT32 ulBadChecksum; // number of times a non-correctable ECC/checksum error was found JET_LOGTIME logtimeBadChecksum; // the date of the last time that a non-correctable ECC/checksum error was found - unsigned long ulBadChecksumOld; // number of times a non-correctable ECC/checksum error was found before last repair + JET_UINT32 ulBadChecksumOld; // number of times a non-correctable ECC/checksum error was found before last repair } JET_DBINFOMISC2; #endif // JET_VERSION >= 0x0600 @@ -2278,11 +2297,11 @@ typedef struct #if ( JET_VERSION >= 0x0601 ) typedef struct { - unsigned long ulVersion; // the major (incompatible) version of DAE from the last engine attach/create. - unsigned long ulUpdate; // used to track incremental database format "update (major)" version from the + JET_UINT32 ulVersion; // the major (incompatible) version of DAE from the last engine attach/create. + JET_UINT32 ulUpdate; // used to track incremental database format "update (major)" version from the // last attach/create that is a backward-compatible major update. JET_SIGNATURE signDb; // (28 bytes) signature of the db (incl. creation time). - unsigned long dbstate; // consistent/inconsistent state + JET_UINT32 dbstate; // consistent/inconsistent state JET_LGPOS lgposConsistent; // null if in inconsistent state JET_LOGTIME logtimeConsistent; // null if in inconsistent state @@ -2301,52 +2320,52 @@ typedef struct // Reset when bkinfoFullPrev is set JET_BKINFO bkinfoFullCur; // current backup. Succeed if a // corresponding pat file generated. - unsigned long fShadowingDisabled; - unsigned long fUpgradeDb; + JET_UINT32 fShadowingDisabled; + JET_UINT32 fUpgradeDb; // NT version information. This is needed to decide if an index need // be recreated due to sort table changes. - unsigned long dwMajorVersion; /* OS version info */ - unsigned long dwMinorVersion; - unsigned long dwBuildNumber; - long lSPNumber; + JET_UINT32 dwMajorVersion; // OS version info + JET_UINT32 dwMinorVersion; + JET_UINT32 dwBuildNumber; + JET_INT32 lSPNumber; - unsigned long cbPageSize; // database page size (0 = 4k pages) + JET_UINT32 cbPageSize; // database page size (0 = 4k pages) // new fields added on top of the above JET_DBINFOMISC - unsigned long genMinRequired; // the minimum log generation required for replaying the logs. Typically the checkpoint generation - unsigned long genMaxRequired; // the maximum log generation required for replaying the logs. + JET_UINT32 genMinRequired; // the minimum log generation required for replaying the logs. Typically the checkpoint generation + JET_UINT32 genMaxRequired; // the maximum log generation required for replaying the logs. JET_LOGTIME logtimeGenMaxCreate; // creation time of the genMax log file - unsigned long ulRepairCount; // number of times repair has been called on this database + JET_UINT32 ulRepairCount; // number of times repair has been called on this database JET_LOGTIME logtimeRepair; // the date of the last time that repair was run - unsigned long ulRepairCountOld; // number of times ErrREPAIRAttachForRepair has been called on this database before the last defrag + JET_UINT32 ulRepairCountOld; // number of times ErrREPAIRAttachForRepair has been called on this database before the last defrag - unsigned long ulECCFixSuccess; // number of times a one bit error was fixed and resulted in a good page + JET_UINT32 ulECCFixSuccess; // number of times a one bit error was fixed and resulted in a good page JET_LOGTIME logtimeECCFixSuccess; // the date of the last time that a one bit error was fixed and resulted in a good page - unsigned long ulECCFixSuccessOld; // number of times a one bit error was fixed and resulted in a good page before last repair + JET_UINT32 ulECCFixSuccessOld; // number of times a one bit error was fixed and resulted in a good page before last repair - unsigned long ulECCFixFail; // number of times a one bit error was fixed and resulted in a bad page + JET_UINT32 ulECCFixFail; // number of times a one bit error was fixed and resulted in a bad page JET_LOGTIME logtimeECCFixFail; // the date of the last time that a one bit error was fixed and resulted in a bad page - unsigned long ulECCFixFailOld; // number of times a one bit error was fixed and resulted in a bad page before last repair + JET_UINT32 ulECCFixFailOld; // number of times a one bit error was fixed and resulted in a bad page before last repair - unsigned long ulBadChecksum; // number of times a non-correctable ECC/checksum error was found + JET_UINT32 ulBadChecksum; // number of times a non-correctable ECC/checksum error was found JET_LOGTIME logtimeBadChecksum; // the date of the last time that a non-correctable ECC/checksum error was found - unsigned long ulBadChecksumOld; // number of times a non-correctable ECC/checksum error was found before last repair + JET_UINT32 ulBadChecksumOld; // number of times a non-correctable ECC/checksum error was found before last repair // new fields added on top of the above JET_DBINFOMISC2 - unsigned long genCommitted; // the maximum log generation committed to the database. Typically the current log generation + JET_UINT32 genCommitted; // the maximum log generation committed to the database. Typically the current log generation } JET_DBINFOMISC3; typedef struct { - unsigned long ulVersion; // the major (incompatible) version of DAE from the last engine attach/create. - unsigned long ulUpdate; // used to track incremental database format "update (major)" version from the + JET_UINT32 ulVersion; // the major (incompatible) version of DAE from the last engine attach/create. + JET_UINT32 ulUpdate; // used to track incremental database format "update (major)" version from the // last attach/create that is a backward-compatible major update. JET_SIGNATURE signDb; // (28 bytes) signature of the db (incl. creation time). - unsigned long dbstate; // consistent/inconsistent state + JET_UINT32 dbstate; // consistent/inconsistent state JET_LGPOS lgposConsistent; // null if in inconsistent state JET_LOGTIME logtimeConsistent; // null if in inconsistent state @@ -2365,42 +2384,42 @@ typedef struct // Reset when bkinfoFullPrev is set JET_BKINFO bkinfoFullCur; // current backup. Succeed if a // corresponding pat file generated. - unsigned long fShadowingDisabled; - unsigned long fUpgradeDb; + JET_UINT32 fShadowingDisabled; + JET_UINT32 fUpgradeDb; // NT version information. This is needed to decide if an index need // be recreated due to sort table changes. - unsigned long dwMajorVersion; /* OS version info */ - unsigned long dwMinorVersion; - unsigned long dwBuildNumber; - long lSPNumber; + JET_UINT32 dwMajorVersion; // OS version info + JET_UINT32 dwMinorVersion; + JET_UINT32 dwBuildNumber; + JET_INT32 lSPNumber; - unsigned long cbPageSize; // database page size (0 = 4k pages) + JET_UINT32 cbPageSize; // database page size (0 = 4k pages) // new fields added on top of the above JET_DBINFOMISC - unsigned long genMinRequired; // the minimum log generation required for replaying the logs. Typically the checkpoint generation - unsigned long genMaxRequired; // the maximum log generation required for replaying the logs. + JET_UINT32 genMinRequired; // the minimum log generation required for replaying the logs. Typically the checkpoint generation + JET_UINT32 genMaxRequired; // the maximum log generation required for replaying the logs. JET_LOGTIME logtimeGenMaxCreate; // creation time of the genMax log file - unsigned long ulRepairCount; // number of times repair has been called on this database + JET_UINT32 ulRepairCount; // number of times repair has been called on this database JET_LOGTIME logtimeRepair; // the date of the last time that repair was run - unsigned long ulRepairCountOld; // number of times ErrREPAIRAttachForRepair has been called on this database before the last defrag + JET_UINT32 ulRepairCountOld; // number of times ErrREPAIRAttachForRepair has been called on this database before the last defrag - unsigned long ulECCFixSuccess; // number of times a one bit error was fixed and resulted in a good page + JET_UINT32 ulECCFixSuccess; // number of times a one bit error was fixed and resulted in a good page JET_LOGTIME logtimeECCFixSuccess; // the date of the last time that a one bit error was fixed and resulted in a good page - unsigned long ulECCFixSuccessOld; // number of times a one bit error was fixed and resulted in a good page before last repair + JET_UINT32 ulECCFixSuccessOld; // number of times a one bit error was fixed and resulted in a good page before last repair - unsigned long ulECCFixFail; // number of times a one bit error was fixed and resulted in a bad page + JET_UINT32 ulECCFixFail; // number of times a one bit error was fixed and resulted in a bad page JET_LOGTIME logtimeECCFixFail; // the date of the last time that a one bit error was fixed and resulted in a bad page - unsigned long ulECCFixFailOld; // number of times a one bit error was fixed and resulted in a bad page before last repair + JET_UINT32 ulECCFixFailOld; // number of times a one bit error was fixed and resulted in a bad page before last repair - unsigned long ulBadChecksum; // number of times a non-correctable ECC/checksum error was found + JET_UINT32 ulBadChecksum; // number of times a non-correctable ECC/checksum error was found JET_LOGTIME logtimeBadChecksum; // the date of the last time that a non-correctable ECC/checksum error was found - unsigned long ulBadChecksumOld; // number of times a non-correctable ECC/checksum error was found before last repair + JET_UINT32 ulBadChecksumOld; // number of times a non-correctable ECC/checksum error was found before last repair // new fields added on top of the above JET_DBINFOMISC2 - unsigned long genCommitted; // the maximum log generation committed to the database. Typically the current log generation + JET_UINT32 genCommitted; // the maximum log generation committed to the database. Typically the current log generation // new fields added on top of the above JET_DBINFOMISC3 JET_BKINFO bkinfoCopyPrev; // Last successful Copy backup @@ -2412,11 +2431,11 @@ typedef struct #if ( JET_VERSION >= 0x0601 ) typedef struct { - unsigned long ulVersion; // the major (incompatible) version of DAE from the last engine attach/create. - unsigned long ulUpdate; // used to track incremental database format "update (major)" version from the + JET_UINT32 ulVersion; // the major (incompatible) version of DAE from the last engine attach/create. + JET_UINT32 ulUpdate; // used to track incremental database format "update (major)" version from the // last attach/create that is a backward-compatible major update. JET_SIGNATURE signDb; // (28 bytes) signature of the db (incl. creation time). - unsigned long dbstate; // consistent/inconsistent state + JET_UINT32 dbstate; // consistent/inconsistent state JET_LGPOS lgposConsistent; // null if in inconsistent state JET_LOGTIME logtimeConsistent; // null if in inconsistent state @@ -2435,64 +2454,64 @@ typedef struct // Reset when bkinfoFullPrev is set JET_BKINFO bkinfoFullCur; // current backup. Succeed if a // corresponding pat file generated. - unsigned long fShadowingDisabled; - unsigned long fUpgradeDb; + JET_UINT32 fShadowingDisabled; + JET_UINT32 fUpgradeDb; // NT version information. This is needed to decide if an index need // be recreated due to sort table changes. - unsigned long dwMajorVersion; /* OS version info */ - unsigned long dwMinorVersion; - unsigned long dwBuildNumber; - long lSPNumber; + JET_UINT32 dwMajorVersion; // OS version info + JET_UINT32 dwMinorVersion; + JET_UINT32 dwBuildNumber; + JET_INT32 lSPNumber; - unsigned long cbPageSize; // database page size (0 = 4k pages) + JET_UINT32 cbPageSize; // database page size (0 = 4k pages) // new fields added on top of the above JET_DBINFOMISC - unsigned long genMinRequired; // the minimum log generation required for replaying the logs. Typically the checkpoint generation - unsigned long genMaxRequired; // the maximum log generation required for replaying the logs. + JET_UINT32 genMinRequired; // the minimum log generation required for replaying the logs. Typically the checkpoint generation + JET_UINT32 genMaxRequired; // the maximum log generation required for replaying the logs. JET_LOGTIME logtimeGenMaxCreate; // creation time of the genMax log file - unsigned long ulRepairCount; // number of times repair has been called on this database + JET_UINT32 ulRepairCount; // number of times repair has been called on this database JET_LOGTIME logtimeRepair; // the date of the last time that repair was run - unsigned long ulRepairCountOld; // number of times ErrREPAIRAttachForRepair has been called on this database before the last defrag + JET_UINT32 ulRepairCountOld; // number of times ErrREPAIRAttachForRepair has been called on this database before the last defrag - unsigned long ulECCFixSuccess; // number of times a one bit error was fixed and resulted in a good page + JET_UINT32 ulECCFixSuccess; // number of times a one bit error was fixed and resulted in a good page JET_LOGTIME logtimeECCFixSuccess; // the date of the last time that a one bit error was fixed and resulted in a good page - unsigned long ulECCFixSuccessOld; // number of times a one bit error was fixed and resulted in a good page before last repair + JET_UINT32 ulECCFixSuccessOld; // number of times a one bit error was fixed and resulted in a good page before last repair - unsigned long ulECCFixFail; // number of times a one bit error was fixed and resulted in a bad page + JET_UINT32 ulECCFixFail; // number of times a one bit error was fixed and resulted in a bad page JET_LOGTIME logtimeECCFixFail; // the date of the last time that a one bit error was fixed and resulted in a bad page - unsigned long ulECCFixFailOld; // number of times a one bit error was fixed and resulted in a bad page before last repair + JET_UINT32 ulECCFixFailOld; // number of times a one bit error was fixed and resulted in a bad page before last repair - unsigned long ulBadChecksum; // number of times a non-correctable ECC/checksum error was found + JET_UINT32 ulBadChecksum; // number of times a non-correctable ECC/checksum error was found JET_LOGTIME logtimeBadChecksum; // the date of the last time that a non-correctable ECC/checksum error was found - unsigned long ulBadChecksumOld; // number of times a non-correctable ECC/checksum error was found before last repair + JET_UINT32 ulBadChecksumOld; // number of times a non-correctable ECC/checksum error was found before last repair // new fields added on top of the above JET_DBINFOMISC2 - unsigned long genCommitted; // the maximum log generation committed to the database. Typically the current log generation + JET_UINT32 genCommitted; // the maximum log generation committed to the database. Typically the current log generation // new fields added on top of the above JET_DBINFOMISC3 JET_BKINFO bkinfoCopyPrev; // Last successful Copy backup JET_BKINFO bkinfoDiffPrev; // Last successful Differential backup, reset when bkinfoFullPrev is set // new fields added on top of the above JET_DBINFOMISC4 - unsigned long ulIncrementalReseedCount; // number of times incremental reseed has been initiated on this database + JET_UINT32 ulIncrementalReseedCount; // number of times incremental reseed has been initiated on this database JET_LOGTIME logtimeIncrementalReseed; // the date of the last time that incremental reseed was initiated on this database - unsigned long ulIncrementalReseedCountOld; // number of times incremental reseed was initiated on this database before the last defrag + JET_UINT32 ulIncrementalReseedCountOld; // number of times incremental reseed was initiated on this database before the last defrag - unsigned long ulPagePatchCount; // number of pages patched in the database as a part of incremental reseed + JET_UINT32 ulPagePatchCount; // number of pages patched in the database as a part of incremental reseed JET_LOGTIME logtimePagePatch; // the date of the last time that a page was patched as a part of incremental reseed - unsigned long ulPagePatchCountOld; // number of pages patched in the database as a part of incremental reseed before the last defrag + JET_UINT32 ulPagePatchCountOld; // number of pages patched in the database as a part of incremental reseed before the last defrag } JET_DBINFOMISC5; typedef struct { - unsigned long ulVersion; // the major (incompatible) version of DAE from the last engine attach/create. - unsigned long ulUpdate; // used to track incremental database format "update (major)" version from the + JET_UINT32 ulVersion; // the major (incompatible) version of DAE from the last engine attach/create. + JET_UINT32 ulUpdate; // used to track incremental database format "update (major)" version from the // last attach/create that is a backward-compatible major update. JET_SIGNATURE signDb; // (28 bytes) signature of the db (incl. creation time). - unsigned long dbstate; // consistent/inconsistent state + JET_UINT32 dbstate; // consistent/inconsistent state JET_LGPOS lgposConsistent; // null if in inconsistent state JET_LOGTIME logtimeConsistent; // null if in inconsistent state @@ -2511,71 +2530,71 @@ typedef struct // Reset when bkinfoFullPrev is set JET_BKINFO bkinfoFullCur; // current backup. Succeed if a // corresponding pat file generated. - unsigned long fShadowingDisabled; - unsigned long fUpgradeDb; + JET_UINT32 fShadowingDisabled; + JET_UINT32 fUpgradeDb; // NT version information. This is needed to decide if an index need // be recreated due to sort table changes. - unsigned long dwMajorVersion; /* OS version info */ - unsigned long dwMinorVersion; - unsigned long dwBuildNumber; - long lSPNumber; + JET_UINT32 dwMajorVersion; // OS version info + JET_UINT32 dwMinorVersion; + JET_UINT32 dwBuildNumber; + JET_INT32 lSPNumber; - unsigned long cbPageSize; // database page size (0 = 4k pages) + JET_UINT32 cbPageSize; // database page size (0 = 4k pages) // new fields added on top of the above JET_DBINFOMISC - unsigned long genMinRequired; // the minimum log generation required for replaying the logs. Typically the checkpoint generation - unsigned long genMaxRequired; // the maximum log generation required for replaying the logs. + JET_UINT32 genMinRequired; // the minimum log generation required for replaying the logs. Typically the checkpoint generation + JET_UINT32 genMaxRequired; // the maximum log generation required for replaying the logs. JET_LOGTIME logtimeGenMaxCreate; // creation time of the genMax log file - unsigned long ulRepairCount; // number of times repair has been called on this database + JET_UINT32 ulRepairCount; // number of times repair has been called on this database JET_LOGTIME logtimeRepair; // the date of the last time that repair was run - unsigned long ulRepairCountOld; // number of times ErrREPAIRAttachForRepair has been called on this database before the last defrag + JET_UINT32 ulRepairCountOld; // number of times ErrREPAIRAttachForRepair has been called on this database before the last defrag - unsigned long ulECCFixSuccess; // number of times a one bit error was fixed and resulted in a good page + JET_UINT32 ulECCFixSuccess; // number of times a one bit error was fixed and resulted in a good page JET_LOGTIME logtimeECCFixSuccess; // the date of the last time that a one bit error was fixed and resulted in a good page - unsigned long ulECCFixSuccessOld; // number of times a one bit error was fixed and resulted in a good page before last repair + JET_UINT32 ulECCFixSuccessOld; // number of times a one bit error was fixed and resulted in a good page before last repair - unsigned long ulECCFixFail; // number of times a one bit error was fixed and resulted in a bad page + JET_UINT32 ulECCFixFail; // number of times a one bit error was fixed and resulted in a bad page JET_LOGTIME logtimeECCFixFail; // the date of the last time that a one bit error was fixed and resulted in a bad page - unsigned long ulECCFixFailOld; // number of times a one bit error was fixed and resulted in a bad page before last repair + JET_UINT32 ulECCFixFailOld; // number of times a one bit error was fixed and resulted in a bad page before last repair - unsigned long ulBadChecksum; // number of times a non-correctable ECC/checksum error was found + JET_UINT32 ulBadChecksum; // number of times a non-correctable ECC/checksum error was found JET_LOGTIME logtimeBadChecksum; // the date of the last time that a non-correctable ECC/checksum error was found - unsigned long ulBadChecksumOld; // number of times a non-correctable ECC/checksum error was found before last repair + JET_UINT32 ulBadChecksumOld; // number of times a non-correctable ECC/checksum error was found before last repair // new fields added on top of the above JET_DBINFOMISC2 - unsigned long genCommitted; // the maximum log generation committed to the database. Typically the current log generation + JET_UINT32 genCommitted; // the maximum log generation committed to the database. Typically the current log generation // new fields added on top of the above JET_DBINFOMISC3 JET_BKINFO bkinfoCopyPrev; // Last successful Copy backup JET_BKINFO bkinfoDiffPrev; // Last successful Differential backup, reset when bkinfoFullPrev is set // new fields added on top of the above JET_DBINFOMISC4 - unsigned long ulIncrementalReseedCount; // number of times incremental reseed has been initiated on this database + JET_UINT32 ulIncrementalReseedCount; // number of times incremental reseed has been initiated on this database JET_LOGTIME logtimeIncrementalReseed; // the date of the last time that incremental reseed was initiated on this database - unsigned long ulIncrementalReseedCountOld; // number of times incremental reseed was initiated on this database before the last defrag + JET_UINT32 ulIncrementalReseedCountOld; // number of times incremental reseed was initiated on this database before the last defrag - unsigned long ulPagePatchCount; // number of pages patched in the database as a part of incremental reseed + JET_UINT32 ulPagePatchCount; // number of pages patched in the database as a part of incremental reseed JET_LOGTIME logtimePagePatch; // the date of the last time that a page was patched as a part of incremental reseed - unsigned long ulPagePatchCountOld; // number of pages patched in the database as a part of incremental reseed before the last defrag + JET_UINT32 ulPagePatchCountOld; // number of pages patched in the database as a part of incremental reseed before the last defrag // new fields added on top of the above JET_DBINFOMISC5 JET_LOGTIME logtimeChecksumPrev; // last checksum pass finish time (UTC - 1900y) JET_LOGTIME logtimeChecksumStart; // current checksum pass start time (UTC - 1900y) - unsigned long cpgDatabaseChecked; // # of page checked for current pass + JET_UINT32 cpgDatabaseChecked; // # of page checked for current pass } JET_DBINFOMISC6; #endif // JET_VERSION >= 0x0601 #if ( JET_VERSION >= 0x0A00 ) typedef struct { - unsigned long ulVersion; // the major (incompatible) version of DAE from the last engine attach/create. - unsigned long ulUpdate; // used to track incremental database format "update (major)" version from the + JET_UINT32 ulVersion; // the major (incompatible) version of DAE from the last engine attach/create. + JET_UINT32 ulUpdate; // used to track incremental database format "update (major)" version from the // last attach/create that is a backward-compatible major update. JET_SIGNATURE signDb; // (28 bytes) signature of the db (incl. creation time). - unsigned long dbstate; // consistent/inconsistent state + JET_UINT32 dbstate; // consistent/inconsistent state JET_LGPOS lgposConsistent; // null if in inconsistent state JET_LOGTIME logtimeConsistent; // null if in inconsistent state @@ -2594,60 +2613,60 @@ typedef struct // Reset when bkinfoFullPrev is set JET_BKINFO bkinfoFullCur; // current backup. Succeed if a // corresponding pat file generated. - unsigned long fShadowingDisabled; - unsigned long fUpgradeDb; + JET_UINT32 fShadowingDisabled; + JET_UINT32 fUpgradeDb; // NT version information. This is needed to decide if an index need // be recreated due to sort table changes. - unsigned long dwMajorVersion; /* OS version info */ - unsigned long dwMinorVersion; - unsigned long dwBuildNumber; - long lSPNumber; + JET_UINT32 dwMajorVersion; // OS version info + JET_UINT32 dwMinorVersion; + JET_UINT32 dwBuildNumber; + JET_INT32 lSPNumber; - unsigned long cbPageSize; // database page size (0 = 4k pages) + JET_UINT32 cbPageSize; // database page size (0 = 4k pages) // new fields added on top of the above JET_DBINFOMISC - unsigned long genMinRequired; // the minimum log generation required for replaying the logs. Typically the checkpoint generation - unsigned long genMaxRequired; // the maximum log generation required for replaying the logs. + JET_UINT32 genMinRequired; // the minimum log generation required for replaying the logs. Typically the checkpoint generation + JET_UINT32 genMaxRequired; // the maximum log generation required for replaying the logs. JET_LOGTIME logtimeGenMaxCreate; // creation time of the genMax log file - unsigned long ulRepairCount; // number of times repair has been called on this database + JET_UINT32 ulRepairCount; // number of times repair has been called on this database JET_LOGTIME logtimeRepair; // the date of the last time that repair was run - unsigned long ulRepairCountOld; // number of times ErrREPAIRAttachForRepair has been called on this database before the last defrag + JET_UINT32 ulRepairCountOld; // number of times ErrREPAIRAttachForRepair has been called on this database before the last defrag - unsigned long ulECCFixSuccess; // number of times a one bit error was fixed and resulted in a good page + JET_UINT32 ulECCFixSuccess; // number of times a one bit error was fixed and resulted in a good page JET_LOGTIME logtimeECCFixSuccess; // the date of the last time that a one bit error was fixed and resulted in a good page - unsigned long ulECCFixSuccessOld; // number of times a one bit error was fixed and resulted in a good page before last repair + JET_UINT32 ulECCFixSuccessOld; // number of times a one bit error was fixed and resulted in a good page before last repair - unsigned long ulECCFixFail; // number of times a one bit error was fixed and resulted in a bad page + JET_UINT32 ulECCFixFail; // number of times a one bit error was fixed and resulted in a bad page JET_LOGTIME logtimeECCFixFail; // the date of the last time that a one bit error was fixed and resulted in a bad page - unsigned long ulECCFixFailOld; // number of times a one bit error was fixed and resulted in a bad page before last repair + JET_UINT32 ulECCFixFailOld; // number of times a one bit error was fixed and resulted in a bad page before last repair - unsigned long ulBadChecksum; // number of times a non-correctable ECC/checksum error was found + JET_UINT32 ulBadChecksum; // number of times a non-correctable ECC/checksum error was found JET_LOGTIME logtimeBadChecksum; // the date of the last time that a non-correctable ECC/checksum error was found - unsigned long ulBadChecksumOld; // number of times a non-correctable ECC/checksum error was found before last repair + JET_UINT32 ulBadChecksumOld; // number of times a non-correctable ECC/checksum error was found before last repair // new fields added on top of the above JET_DBINFOMISC2 - unsigned long genCommitted; // the maximum log generation committed to the database. Typically the current log generation + JET_UINT32 genCommitted; // the maximum log generation committed to the database. Typically the current log generation // new fields added on top of the above JET_DBINFOMISC3 JET_BKINFO bkinfoCopyPrev; // Last successful Copy backup JET_BKINFO bkinfoDiffPrev; // Last successful Differential backup, reset when bkinfoFullPrev is set // new fields added on top of the above JET_DBINFOMISC4 - unsigned long ulIncrementalReseedCount; // number of times incremental reseed has been initiated on this database + JET_UINT32 ulIncrementalReseedCount; // number of times incremental reseed has been initiated on this database JET_LOGTIME logtimeIncrementalReseed; // the date of the last time that incremental reseed was initiated on this database - unsigned long ulIncrementalReseedCountOld; // number of times incremental reseed was initiated on this database before the last defrag + JET_UINT32 ulIncrementalReseedCountOld; // number of times incremental reseed was initiated on this database before the last defrag - unsigned long ulPagePatchCount; // number of pages patched in the database as a part of incremental reseed + JET_UINT32 ulPagePatchCount; // number of pages patched in the database as a part of incremental reseed JET_LOGTIME logtimePagePatch; // the date of the last time that a page was patched as a part of incremental reseed - unsigned long ulPagePatchCountOld; // number of pages patched in the database as a part of incremental reseed before the last defrag + JET_UINT32 ulPagePatchCountOld; // number of pages patched in the database as a part of incremental reseed before the last defrag // new fields added on top of the above JET_DBINFOMISC5 - JET_LOGTIME logtimeChecksumPrev; // last checksum pass finish time (UTC - 1900y) - JET_LOGTIME logtimeChecksumStart; // current checksum pass start time (UTC - 1900y) - unsigned long cpgDatabaseChecked; // # of page checked for current pass + JET_LOGTIME logtimeChecksumPrev; // last checksum pass finish time (UTC - 1900y) + JET_LOGTIME logtimeChecksumStart; // current checksum pass start time (UTC - 1900y) + JET_UINT32 cpgDatabaseChecked; // # of page checked for current pass // new fields added on top of the above JET_DBINFOMISC6 JET_LOGTIME logtimeLastReAttach; // Last attach time. @@ -2657,43 +2676,43 @@ typedef struct typedef struct { - unsigned long ulGeneration; + JET_UINT32 ulGeneration; JET_SIGNATURE signLog; JET_LOGTIME logtimeCreate; JET_LOGTIME logtimePreviousGeneration; - unsigned long ulFlags; + JET_UINT32 ulFlags; - unsigned long ulVersionMajor; - unsigned long ulVersionMinor; - unsigned long ulVersionUpdate; + JET_UINT32 ulVersionMajor; + JET_UINT32 ulVersionMinor; + JET_UINT32 ulVersionUpdate; - unsigned long cbSectorSize; - unsigned long cbHeader; - unsigned long cbFile; - unsigned long cbDatabasePageSize; + JET_UINT32 cbSectorSize; + JET_UINT32 cbHeader; + JET_UINT32 cbFile; + JET_UINT32 cbDatabasePageSize; } JET_LOGINFOMISC; #if ( JET_VERSION >= 0x0601 ) typedef struct { - unsigned long ulGeneration; + JET_UINT32 ulGeneration; JET_SIGNATURE signLog; JET_LOGTIME logtimeCreate; JET_LOGTIME logtimePreviousGeneration; - unsigned long ulFlags; + JET_UINT32 ulFlags; - unsigned long ulVersionMajor; - unsigned long ulVersionMinor; - unsigned long ulVersionUpdate; + JET_UINT32 ulVersionMajor; + JET_UINT32 ulVersionMinor; + JET_UINT32 ulVersionUpdate; - unsigned long cbSectorSize; - unsigned long cbHeader; - unsigned long cbFile; - unsigned long cbDatabasePageSize; + JET_UINT32 cbSectorSize; + JET_UINT32 cbHeader; + JET_UINT32 cbFile; + JET_UINT32 cbDatabasePageSize; JET_LGPOS lgposCheckpoint; } JET_LOGINFOMISC2; @@ -2703,28 +2722,28 @@ typedef struct #if ( JET_VERSION >= 0x0A01 ) typedef struct { - unsigned long ulGeneration; + JET_UINT32 ulGeneration; JET_SIGNATURE signLog; JET_LOGTIME logtimeCreate; JET_LOGTIME logtimePreviousGeneration; - unsigned long ulFlags; + JET_UINT32 ulFlags; - unsigned long ulVersionMajor; - unsigned long ulVersionUpdateMajor; - unsigned long ulVersionUpdateMinor; + JET_UINT32 ulVersionMajor; + JET_UINT32 ulVersionUpdateMajor; + JET_UINT32 ulVersionUpdateMinor; - unsigned long cbSectorSize; - unsigned long cbHeader; - unsigned long cbFile; - unsigned long cbDatabasePageSize; + JET_UINT32 cbSectorSize; + JET_UINT32 cbHeader; + JET_UINT32 cbFile; + JET_UINT32 cbDatabasePageSize; JET_LGPOS lgposCheckpoint; - unsigned long ulVersionMinorDeprecated; // deprecated + JET_UINT32 ulVersionMinorDeprecated; // deprecated - unsigned __int64 checksumPrevLogAllSegments; + JET_UINT64 checksumPrevLogAllSegments; } JET_LOGINFOMISC3; @@ -2789,32 +2808,32 @@ typedef struct typedef struct { - unsigned long cbStruct; /* size of this structure */ - JET_ERR errDefault; /* given no desired special treatment, the client should return this */ - JET_INSTANCE instance; /* the instance for which recovery is run */ + JET_UINT32 cbStruct; // size of this structure + JET_ERR errDefault; // given no desired special treatment, the client should return this + JET_INSTANCE instance; // the instance for which recovery is run - JET_SNT sntUnion; /* indicates the type for the union */ + JET_SNT sntUnion; // indicates the type for the union union { // JET_sntOpenLog struct { - unsigned long cbStruct; /* size of this structure */ - unsigned long lGenNext; /* next log to be replayed */ - unsigned char fCurrentLog:1; /* 0 if log with full / archive name */ - unsigned char eReason; /* the open disposition or reason - JET_OpenLog* */ - unsigned char rgbReserved[6]; /* will be 0 */ - WCHAR * wszLogFile; /* full path of the log file we will open */ - unsigned long cdbinfomisc; /* number of database headers */ - JET_DBINFOMISC7 * rgdbinfomisc; /* array of database headers for attached databases */ + JET_UINT32 cbStruct; // size of this structure + JET_UINT32 lGenNext; // next log to be replayed + JET_BYTE fCurrentLog:1; // 0 if log with full / archive name + JET_BYTE eReason; // the open disposition or reason - JET_OpenLog* + JET_BYTE rgbReserved[6]; // will be 0 + JET_PWSTR wszLogFile; // full path of the log file we will open + JET_UINT32 cdbinfomisc; // number of database headers + JET_DBINFOMISC7 * rgdbinfomisc; // array of database headers for attached databases } OpenLog; // JET_sntOpenCheckpoint struct { - unsigned long cbStruct; /* size of this structure */ - WCHAR * wszCheckpoint; /* full path of the checkpoint file we will open */ + JET_UINT32 cbStruct; // size of this structure + JET_PWSTR wszCheckpoint; // full path of the checkpoint file we will open } OpenCheckpoint; // JET_sntOpenDatabase not yet implemented. @@ -2822,59 +2841,59 @@ typedef struct // JET_sntMissingLog struct { - unsigned long cbStruct; /* size of this structure */ - unsigned long lGenMissing; /* next log to be replayed */ - unsigned char fCurrentLog:1; /* 0 if log with full / archive name */ - unsigned char eNextAction; /* if success is returned, what action will we take */ - unsigned char rgbReserved[6]; /* will be 0 */ - WCHAR * wszLogFile; /* full path of the log file we will open */ - unsigned long cdbinfomisc; /* number of database headers */ - JET_DBINFOMISC7 * rgdbinfomisc; /* array of database headers for attached databases */ + JET_UINT32 cbStruct; // size of this structure + JET_UINT32 lGenMissing; // next log to be replayed + JET_BYTE fCurrentLog:1; // 0 if log with full / archive name + JET_BYTE eNextAction; // if success is returned, what action will we take + JET_BYTE rgbReserved[6]; // will be 0 + JET_PWSTR wszLogFile; // full path of the log file we will open + JET_UINT32 cdbinfomisc; // number of database headers + JET_DBINFOMISC7 * rgdbinfomisc; // array of database headers for attached databases } MissingLog; // JET_sntBeginUndo struct { - unsigned long cbStruct; /* size of this structure */ - unsigned long cdbinfomisc; /* number of database headers */ - JET_DBINFOMISC7 * rgdbinfomisc; /* array of database headers for attached databases */ + JET_UINT32 cbStruct; // size of this structure + JET_UINT32 cdbinfomisc; // number of database headers + JET_DBINFOMISC7 * rgdbinfomisc; // array of database headers for attached databases } BeginUndo; // JET_sntNotificationEvent struct { - unsigned long cbStruct; /* size of this structure */ - unsigned long EventID; /* ID of the event we would publish */ + JET_UINT32 cbStruct; // size of this structure + JET_UINT32 EventID; // ID of the event we would publish } NotificationEvent; // JET_sntSignalErrorCondition struct { - unsigned long cbStruct; /* size of this structure */ + JET_UINT32 cbStruct; // size of this structure // no extra info beyond errDefault above } SignalErrorCondition; // JET_sntAttachedDb struct { - unsigned long cbStruct; /* size of this structure */ - const WCHAR * wszDbPath; /* full path of the database file */ + JET_UINT32 cbStruct; // size of this structure + JET_PCWSTR wszDbPath; // full path of the database file } AttachedDb; // JET_sntDetachingDb struct { - unsigned long cbStruct; /* size of this structure */ - const WCHAR * wszDbPath; /* full path of the database file */ + JET_UINT32 cbStruct; // size of this structure + JET_PCWSTR wszDbPath; // full path of the database file } DetachingDb; // JET_sntCommitCtx struct { - unsigned long cbStruct; /* size of this structure */ - const void * pbCommitCtx; /* commit context */ - unsigned long cbCommitCtx; /* size of commit context */ - unsigned long fCallbackType; /* type of callback */ + JET_UINT32 cbStruct; // size of this structure + JET_PCVOID pbCommitCtx; // commit context + JET_UINT32 cbCommitCtx; // size of commit context + JET_UINT32 fCallbackType; // type of callback } CommitCtx; }; } JET_RECOVERYCONTROL; @@ -2885,12 +2904,12 @@ typedef struct #endif // JET_VERSION >= 0x0A00 #if ( JET_VERSION >= 0x0600 ) -typedef struct /* Status Notification Message */ +typedef struct // Status Notification Message { - unsigned long cbStruct; /* Size of this structure */ - JET_SNC snc; /* Status Notification Code */ - unsigned long ul; /* Numeric identifier */ - char sz[256]; /* Identifier */ + JET_UINT32 cbStruct; // Size of this structure + JET_SNC snc; // Status Notification Code + JET_UINT32 ul; // Numeric identifier + JET_CHAR sz[256]; // Identifier } JET_SNMSG; #endif // JET_VERSION >= 0x0600 @@ -2899,32 +2918,32 @@ typedef struct /* Status Notification Message */ typedef struct // Status Notification Page Patch Request { - unsigned long cbStruct; // Size of this structure - unsigned long pageNumber; // Page being patched - const WCHAR * szLogFile; // Full path of the current logfile + JET_UINT32 cbStruct; // Size of this structure + JET_UINT32 pageNumber; // Page being patched + JET_PCWSTR szLogFile; // Full path of the current logfile JET_INSTANCE instance; // Instance that is running recovery JET_DBINFOMISC7 dbinfomisc; // Database header for the database being patched - const void * pvToken; // Patch token - unsigned long cbToken; // Size of the patch token - const void * pvData; // Patch data (the database page) - unsigned long cbData; // Size of the patch data + JET_PCVOID pvToken; // Patch token + JET_UINT32 cbToken; // Size of the patch token + JET_PCVOID pvData; // Patch data (the database page) + JET_UINT32 cbData; // Size of the patch data JET_DBID dbid; // JET_DBID of database being patched } JET_SNPATCHREQUEST; typedef struct // Status Notification Corrupted Page { - unsigned long cbStruct; // Size of this structure - const WCHAR * wszDatabase; // File name of the database corrupted + JET_UINT32 cbStruct; // Size of this structure + JET_PCWSTR wszDatabase; // File name of the database corrupted JET_DBID dbid; // JET_DBID of database corrupted JET_DBINFOMISC7 dbinfomisc; // Database header for corrupted database - unsigned long pageNumber; // That is corrupted + JET_UINT32 pageNumber; // That is corrupted } JET_SNCORRUPTEDPAGE; #endif // JET_VERSION >= 0x0A01 typedef struct { - unsigned long cpageOwned; // number of owned pages in the streaming file - unsigned long cpageAvail; // number of available pages in the streaming file (subset of cpageOwned) + JET_UINT32 cpageOwned; // number of owned pages in the streaming file + JET_UINT32 cpageAvail; // number of available pages in the streaming file (subset of cpageOwned) } JET_STREAMINGFILESPACEINFO; // begin_PubEsent @@ -2934,14 +2953,14 @@ typedef struct // struct JET_THREADSTATS { - unsigned long cbStruct; // size of this struct - unsigned long cPageReferenced; // pages referenced - unsigned long cPageRead; // pages read from disk - unsigned long cPagePreread; // pages preread from disk - unsigned long cPageDirtied; // clean pages modified - unsigned long cPageRedirtied; // dirty pages modified - unsigned long cLogRecord; // log records generated - unsigned long cbLogRecord; // log record bytes generated + JET_UINT32 cbStruct; // size of this struct + JET_UINT32 cPageReferenced; // pages referenced + JET_UINT32 cPageRead; // pages read from disk + JET_UINT32 cPagePreread; // pages preread from disk + JET_UINT32 cPageDirtied; // clean pages modified + JET_UINT32 cPageRedirtied; // dirty pages modified + JET_UINT32 cLogRecord; // log records generated + JET_UINT32 cbLogRecord; // log record bytes generated }; #endif // JET_VERSION >= 0x0600 @@ -2950,16 +2969,16 @@ struct JET_THREADSTATS // struct JET_THREADSTATS2 { - unsigned long cbStruct; // size of this struct - unsigned long cPageReferenced; // pages referenced - unsigned long cPageRead; // pages read from disk - unsigned long cPagePreread; // pages preread from disk - unsigned long cPageDirtied; // clean pages modified - unsigned long cPageRedirtied; // dirty pages modified - unsigned long cLogRecord; // log records generated - unsigned long cbLogRecord; // log record bytes generated - unsigned __int64 cusecPageCacheMiss; // page cache miss latency in microseconds - unsigned long cPageCacheMiss; // page cache misses + JET_UINT32 cbStruct; // size of this struct + JET_UINT32 cPageReferenced; // pages referenced + JET_UINT32 cPageRead; // pages read from disk + JET_UINT32 cPagePreread; // pages preread from disk + JET_UINT32 cPageDirtied; // clean pages modified + JET_UINT32 cPageRedirtied; // dirty pages modified + JET_UINT32 cLogRecord; // log records generated + JET_UINT32 cbLogRecord; // log record bytes generated + JET_UINT64 cusecPageCacheMiss; // page cache miss latency in microseconds + JET_UINT32 cPageCacheMiss; // page cache misses }; #endif // JET_VERSION >= 0x0A00 @@ -2968,19 +2987,19 @@ struct JET_THREADSTATS2 // struct JET_THREADSTATS3 { - unsigned long cbStruct; // size of this struct - unsigned long cPageReferenced; // pages referenced - unsigned long cPageRead; // pages read from disk - unsigned long cPagePreread; // pages preread from disk - unsigned long cPageDirtied; // clean pages modified - unsigned long cPageRedirtied; // dirty pages modified - unsigned long cLogRecord; // log records generated - unsigned long cbLogRecord; // log record bytes generated - unsigned __int64 cusecPageCacheMiss; // page cache miss latency in microseconds - unsigned long cPageCacheMiss; // page cache misses - unsigned long cSeparatedLongValueRead; // separated LV reads - unsigned __int64 cusecLongValuePageCacheMiss; // page cache miss latency in microseconds while reading separated LV data - unsigned long cLongValuePageCacheMiss; // page cache misses while reading separated LV data + JET_UINT32 cbStruct; // size of this struct + JET_UINT32 cPageReferenced; // pages referenced + JET_UINT32 cPageRead; // pages read from disk + JET_UINT32 cPagePreread; // pages preread from disk + JET_UINT32 cPageDirtied; // clean pages modified + JET_UINT32 cPageRedirtied; // dirty pages modified + JET_UINT32 cLogRecord; // log records generated + JET_UINT32 cbLogRecord; // log record bytes generated + JET_UINT64 cusecPageCacheMiss; // page cache miss latency in microseconds + JET_UINT32 cPageCacheMiss; // page cache misses + JET_UINT32 cSeparatedLongValueRead; // separated LV reads + JET_UINT64 cusecLongValuePageCacheMiss; // page cache miss latency in microseconds while reading separated LV data + JET_UINT32 cLongValuePageCacheMiss; // page cache misses while reading separated LV data }; #endif // JET_VERSION >= 0x0A01 // end_PubEsent @@ -2990,34 +3009,34 @@ struct JET_THREADSTATS3 // struct JET_THREADSTATS4 { - unsigned long cbStruct; // size of this struct - unsigned long cPageReferenced; // pages referenced - unsigned long cPageRead; // pages read from disk - unsigned long cPagePreread; // pages preread from disk - unsigned long cPageDirtied; // clean pages modified - unsigned long cPageRedirtied; // dirty pages modified - unsigned long cLogRecord; // log records generated - unsigned long cbLogRecord; // log record bytes generated - unsigned __int64 cusecPageCacheMiss; // page cache miss latency in microseconds - unsigned long cPageCacheMiss; // page cache misses - unsigned long cSeparatedLongValueRead; // separated LV reads - unsigned __int64 cusecLongValuePageCacheMiss; // page cache miss latency in microseconds while reading separated LV data - unsigned long cLongValuePageCacheMiss; // page cache misses while reading separated LV data - unsigned long cSeparatedLongValueCreated; // separated LV creations - unsigned long cPageUniqueCacheHits; // number of unique pages for which requests could be fulfilled by the buffer cache - unsigned long cPageUniqueCacheRequests; // number of unique pages for which requests were made to the buffer cache - unsigned long cDatabaseReads; // number of database reads from disk - unsigned long cSumDatabaseReadQueueDepthImpact; // sum of the impact on disk queue depth made by each database read from disk - unsigned long cSumDatabaseReadQueueDepth; // sum of the actual disk queue depths experienced by each database read from disk - unsigned __int64 cusecWait; // elapsed thread wait time in microseconds - unsigned long cWait; // number of thread waits - unsigned long cNodesFlagDeleted; // number of nodes marked for delete - unsigned long cbNodesFlagDeleted; // size of nodes marked for delete - unsigned long cPageTableAllocated; // number of pages allocated by a table from the database - unsigned long cPageTableReleased; // number of pages released by a table to the database - unsigned long cPageUpdateAllocated; // number of pages allocated as a side effect of an update - unsigned long cPageUpdateReleased; // number of pages released as a side effect of an update - unsigned long cPageUniqueModified; // number of unique pages modified + JET_UINT32 cbStruct; // size of this struct + JET_UINT32 cPageReferenced; // pages referenced + JET_UINT32 cPageRead; // pages read from disk + JET_UINT32 cPagePreread; // pages preread from disk + JET_UINT32 cPageDirtied; // clean pages modified + JET_UINT32 cPageRedirtied; // dirty pages modified + JET_UINT32 cLogRecord; // log records generated + JET_UINT32 cbLogRecord; // log record bytes generated + JET_UINT64 cusecPageCacheMiss; // page cache miss latency in microseconds + JET_UINT32 cPageCacheMiss; // page cache misses + JET_UINT32 cSeparatedLongValueRead; // separated LV reads + JET_UINT64 cusecLongValuePageCacheMiss; // page cache miss latency in microseconds while reading separated LV data + JET_UINT32 cLongValuePageCacheMiss; // page cache misses while reading separated LV data + JET_UINT32 cSeparatedLongValueCreated; // separated LV creations + JET_UINT32 cPageUniqueCacheHits; // number of unique pages for which requests could be fulfilled by the buffer cache + JET_UINT32 cPageUniqueCacheRequests; // number of unique pages for which requests were made to the buffer cache + JET_UINT32 cDatabaseReads; // number of database reads from disk + JET_UINT32 cSumDatabaseReadQueueDepthImpact; // sum of the impact on disk queue depth made by each database read from disk + JET_UINT32 cSumDatabaseReadQueueDepth; // sum of the actual disk queue depths experienced by each database read from disk + JET_UINT64 cusecWait; // elapsed thread wait time in microseconds + JET_UINT32 cWait; // number of thread waits + JET_UINT32 cNodesFlagDeleted; // number of nodes marked for delete + JET_UINT32 cbNodesFlagDeleted; // size of nodes marked for delete + JET_UINT32 cPageTableAllocated; // number of pages allocated by a table from the database + JET_UINT32 cPageTableReleased; // number of pages released by a table to the database + JET_UINT32 cPageUpdateAllocated; // number of pages allocated as a side effect of an update + JET_UINT32 cPageUpdateReleased; // number of pages released as a side effect of an update + JET_UINT32 cPageUniqueModified; // number of unique pages modified }; #endif // JET_VERSION >= 0x0A01 @@ -3100,10 +3119,10 @@ typedef enum typedef struct { - unsigned long cbStruct; + JET_UINT32 cbStruct; JET_RSTMAP_A * rgrstmap; - long crstmap; + JET_INT32 crstmap; JET_LGPOS lgposStop; JET_LOGTIME logtimeStop; @@ -3113,10 +3132,10 @@ typedef struct typedef struct { - unsigned long cbStruct; + JET_UINT32 cbStruct; JET_RSTMAP_W * rgrstmap; - long crstmap; + JET_INT32 crstmap; JET_LGPOS lgposStop; JET_LOGTIME logtimeStop; @@ -3138,30 +3157,30 @@ typedef struct typedef struct { - unsigned long cbStruct; + JET_UINT32 cbStruct; JET_RSTMAP2_A * rgrstmap; - long crstmap; + JET_INT32 crstmap; JET_LGPOS lgposStop; JET_LOGTIME logtimeStop; JET_PFNINITCALLBACK pfnCallback; - void * pvCallbackContext; + JET_PVOID pvCallbackContext; } JET_RSTINFO2_A; typedef struct { - unsigned long cbStruct; + JET_UINT32 cbStruct; JET_RSTMAP2_W * rgrstmap; - long crstmap; + JET_INT32 crstmap; JET_LGPOS lgposStop; JET_LOGTIME logtimeStop; JET_PFNINITCALLBACK pfnCallback; - void * pvCallbackContext; + JET_PVOID pvCallbackContext; } JET_RSTINFO2_W; #ifdef JET_UNICODE @@ -3219,61 +3238,61 @@ typedef enum // typedef struct _BTREE_STATS_BASIC_CATALOG { - unsigned long cbStruct; + JET_UINT32 cbStruct; JET_BTREETYPE eType; - WCHAR rgName[64]; - unsigned long objidFDP; - unsigned long pgnoFDP; - JET_SPACEHINTS * pSpaceHints; + JET_WCHAR rgName[64]; + JET_UINT32 objidFDP; + JET_UINT32 pgnoFDP; + JET_SPACEHINTS * pSpaceHints; } BTREE_STATS_BASIC_CATALOG; typedef struct _BTREE_SPACE_EXTENT_INFO { - unsigned long iPool; - unsigned long pgnoLast; - unsigned long cpgExtent; - unsigned long pgnoSpaceNode; + JET_UINT32 iPool; + JET_UINT32 pgnoLast; + JET_UINT32 cpgExtent; + JET_UINT32 pgnoSpaceNode; } BTREE_SPACE_EXTENT_INFO; // Retrieved with JET_bitSpaceInfoSpaceTrees // typedef struct _BTREE_STATS_SPACE_TREES { - unsigned long cbStruct; - unsigned long cpgPrimary; - unsigned long cpgLastAlloc; - unsigned long fMultiExtent; - unsigned long pgnoOE; - unsigned long pgnoAE; - unsigned long cpgOwned; - unsigned long cpgOwnedCache; - unsigned long cpgAvailable; - unsigned long cpgAvailableCache; - unsigned long cpgSpaceTreeAvailable; - unsigned long cpgReserved; - unsigned long cpgShelved; - int fAutoIncPresents; - unsigned __int64 qwAutoInc; - unsigned long cOwnedExtents; - _Field_size_opt_(cOwnedExtents) BTREE_SPACE_EXTENT_INFO * prgOwnedExtents; - unsigned long cAvailExtents; - _Field_size_opt_(cAvailExtents) BTREE_SPACE_EXTENT_INFO * prgAvailExtents; + JET_UINT32 cbStruct; + JET_UINT32 cpgPrimary; + JET_UINT32 cpgLastAlloc; + JET_UINT32 fMultiExtent; + JET_UINT32 pgnoOE; + JET_UINT32 pgnoAE; + JET_UINT32 cpgOwned; + JET_UINT32 cpgOwnedCache; + JET_UINT32 cpgAvailable; + JET_UINT32 cpgAvailableCache; + JET_UINT32 cpgSpaceTreeAvailable; + JET_UINT32 cpgReserved; + JET_UINT32 cpgShelved; + JET_INT32 fAutoIncPresents; + JET_UINT64 qwAutoInc; + JET_UINT32 cOwnedExtents; + _Field_size_opt_(cOwnedExtents) BTREE_SPACE_EXTENT_INFO * prgOwnedExtents; + JET_UINT32 cAvailExtents; + _Field_size_opt_(cAvailExtents) BTREE_SPACE_EXTENT_INFO * prgAvailExtents; } BTREE_STATS_SPACE_TREES; // Retrieved with JET_bitSpaceInfoFullWalk for data page. // typedef struct { - unsigned long cbStruct; - JET_HISTO * phistoFreeBytes; // per page - JET_HISTO * phistoNodeCounts; // per page (not including TAG 0) - JET_HISTO * phistoKeySizes; // per node - JET_HISTO * phistoDataSizes; // per node - JET_HISTO * phistoKeyCompression; // per compressed node - JET_HISTO * phistoResvTagSizes; // per reserved tag - JET_HISTO * phistoUnreclaimedBytes; // per deleted node + JET_UINT32 cbStruct; + JET_HISTO * phistoFreeBytes; // per page + JET_HISTO * phistoNodeCounts; // per page (not including TAG 0) + JET_HISTO * phistoKeySizes; // per node + JET_HISTO * phistoDataSizes; // per node + JET_HISTO * phistoKeyCompression; // per compressed node + JET_HISTO * phistoResvTagSizes; // per reserved tag + JET_HISTO * phistoUnreclaimedBytes; // per deleted node #if ( JET_VERSION >= 0x0602 ) - __int64 cVersionedNodes; // node accumulation + JET_INT64 cVersionedNodes; // node accumulation #endif } BTREE_STATS_PAGE_SPACE; @@ -3298,13 +3317,13 @@ typedef struct // OE:3-pg,many fFalse? 2 1 2 3 0 typedef struct _BTREE_STATS_PARENT_OF_LEAF { - unsigned long cbStruct; - unsigned long fEmpty; - unsigned long cpgInternal; - unsigned long cpgData; - unsigned long cDepth; + JET_UINT32 cbStruct; + JET_UINT32 fEmpty; + JET_UINT32 cpgInternal; + JET_UINT32 cpgData; + JET_UINT32 cDepth; JET_HISTO * phistoIOContiguousRuns; - unsigned long cForwardScans; + JET_UINT32 cForwardScans; BTREE_STATS_PAGE_SPACE * pInternalPageStats; } BTREE_STATS_PARENT_OF_LEAF; @@ -3312,13 +3331,13 @@ typedef struct _BTREE_STATS_PARENT_OF_LEAF typedef struct _BTREE_STATS_LV { - unsigned long cbStruct; - __int64 cLVRefs; - __int64 cCorruptLVs; - __int64 cSeparatedRootChunks; - __int64 cPartiallyDeletedLVs; - unsigned __int64 lidMax; - int cbLVChunkMax; + JET_UINT32 cbStruct; + JET_INT64 cLVRefs; + JET_INT64 cCorruptLVs; + JET_INT64 cSeparatedRootChunks; + JET_INT64 cPartiallyDeletedLVs; + JET_UINT64 lidMax; + JET_INT32 cbLVChunkMax; JET_HISTO * phistoLVSize; JET_HISTO * phistoLVComp; JET_HISTO * phistoLVRatio; @@ -3335,8 +3354,8 @@ typedef struct _BTREE_STATS // // Version and specified data. // - unsigned long cbStruct; - unsigned long grbitData; + JET_UINT32 cbStruct; + JET_UINT32 grbitData; // // ESE's B+ Trees / space are heirarchical. // @@ -3351,10 +3370,10 @@ typedef struct _BTREE_STATS #if ( JET_VERSION >= 0x0602 ) BTREE_STATS_LV * pLvData; #endif - unsigned long fPgnoFDPRootDelete; + JET_UINT32 fPgnoFDPRootDelete; } BTREE_STATS; -typedef JET_ERR (JET_API *JET_PFNSPACEDATA)( +typedef JET_ERR (JET_API * JET_PFNSPACEDATA)( _In_ BTREE_STATS * pBTreeStats, _In_ JET_API_PTR pvContext ); #endif // JET_VERSION >= 0x0601 @@ -3362,11 +3381,11 @@ typedef JET_ERR (JET_API *JET_PFNSPACEDATA)( //typedef struct // { -// unsigned long cDiscont; -// unsigned long cUnfixedMessyPage; -// unsigned long centriesLT; -// unsigned long centriesTotal; -// unsigned long cpgCompactFreed; +// JET_UINT32 cDiscont; +// JET_UINT32 cUnfixedMessyPage; +// JET_UINT32 centriesLT; +// JET_UINT32 centriesTotal; +// JET_UINT32 cpgCompactFreed; // } JET_OLCSTAT; // begin_PubEsent @@ -3502,12 +3521,12 @@ typedef enum // be populated by all error levels. typedef struct { - unsigned long cbStruct; + JET_UINT32 cbStruct; JET_ERR errValue; // The error value for the requested info level. JET_ERRCAT errcatMostSpecific; // The most specific category of the error. - unsigned char rgCategoricalHierarchy[8]; // Hierarchy of error categories. Position 0 is the highest level in the hierarchy, and the rest are JET_errcatUnknown. - unsigned long lSourceLine; // The source file line for the requested info level. - WCHAR rgszSourceFile[64]; // The source file name for the requested info level. + JET_BYTE rgCategoricalHierarchy[8]; // Hierarchy of error categories. Position 0 is the highest level in the hierarchy, and the rest are JET_errcatUnknown. + JET_UINT32 lSourceLine; // The source file line for the requested info level. + JET_WCHAR rgszSourceFile[64]; // The source file name for the requested info level. } JET_ERRINFOBASIC_W; // grbits for JET_PFNDURABLECOMMITCALLBACK @@ -3519,50 +3538,50 @@ typedef struct typedef struct { JET_SIGNATURE signLog; - int reserved; // for packing so int64 below is 8-byte aligned on 32-bits despite the pshpack4 above - __int64 commitId; + JET_INT32 reserved; // for packing so int64 below is 8-byte aligned on 32-bits despite the pshpack4 above + JET_INT64 commitId; } JET_COMMIT_ID; // assert that commit-id is 8-byte aligned so managed interop works correctly // C_ASSERT( offsetof( JET_COMMIT_ID, commitId ) % 8 == 0 ); // callback for JET_paramDurableCommitCallback -typedef JET_ERR (JET_API *JET_PFNDURABLECOMMITCALLBACK)( - _In_ JET_INSTANCE instance, - _In_ JET_COMMIT_ID *pCommitIdSeen, - _In_ JET_GRBIT grbit ); +typedef JET_ERR (JET_API * JET_PFNDURABLECOMMITCALLBACK)( + _In_ JET_INSTANCE instance, + _In_ JET_COMMIT_ID * pCommitIdSeen, + _In_ JET_GRBIT grbit ); #endif // JET_VERSION >= 0x0602 // end_PubEsent typedef struct { - long lRBSGeneration; // Revert snapshot generation. + JET_INT32 lRBSGeneration; // Revert snapshot generation. JET_LOGTIME logtimeCreate; // date time file creation JET_LOGTIME logtimeCreatePrevRBS; // date time prev file creation - unsigned long ulMajor; // major version number - unsigned long ulMinor; // minor version number + JET_UINT32 ulMajor; // major version number + JET_UINT32 ulMinor; // minor version number - unsigned long long cbLogicalFileSize; // Logical file size + JET_UINT64 cbLogicalFileSize; // Logical file size } JET_RBSINFOMISC; typedef struct { - long lGenMinRevertStart; // Min log generation across databases at start of revert. - long lGenMaxRevertStart; // Max log generation across databases at start of revert. + JET_INT32 lGenMinRevertStart; // Min log generation across databases at start of revert. + JET_INT32 lGenMaxRevertStart; // Max log generation across databases at start of revert. - long lGenMinRevertEnd; // Min log generation across databases at end of revert. - long lGenMaxRevertEnd; // Max log generation across databases at end of revert. + JET_INT32 lGenMinRevertEnd; // Min log generation across databases at end of revert. + JET_INT32 lGenMaxRevertEnd; // Max log generation across databases at end of revert. JET_LOGTIME logtimeRevertFrom; // The time we started reverting from. We will skip adding reverting to time as the caller already gets that info as part of prepare call. - unsigned long long cSecRevert; // Total secs spent in revert process. - unsigned long long cPagesReverted; // Total pages reverted across all the database files as part of the revert. + JET_UINT64 cSecRevert; // Total secs spent in revert process. + JET_UINT64 cPagesReverted; // Total pages reverted across all the database files as part of the revert. - long lGenRBSMaxApplied; // Max revert snapshot generation applied during revert. - long lGenRBSMinApplied; // Min revert snapshot generation applied during revert. + JET_INT32 lGenRBSMaxApplied; // Max revert snapshot generation applied during revert. + JET_INT32 lGenRBSMinApplied; // Min revert snapshot generation applied during revert. } JET_RBSREVERTINFOMISC; // begin_PubEsent @@ -4284,11 +4303,11 @@ typedef enum typedef struct { - unsigned long ulUserID; - unsigned char nOperationID; - unsigned char nOperationType; - unsigned char nClientType; - unsigned char fFlags; + JET_UINT32 ulUserID; + JET_BYTE nOperationID; + JET_BYTE nOperationType; + JET_BYTE nClientType; + JET_BYTE fFlags; } JET_OPERATIONCONTEXT; #endif // JET_VERSION >= 0x0A00 @@ -4751,42 +4770,42 @@ typedef struct #define bitTableUpdatableDuringRecovery 0x40000000 /* INTERNAL USE ONLY */ // begin_PubEsent -#define JET_bitTableClassMask 0x001F0000 /* table stats class mask */ -#define JET_bitTableClassNone 0x00000000 /* table belongs to no stats class (default) */ -#define JET_bitTableClass1 0x00010000 /* table belongs to stats class 1 */ -#define JET_bitTableClass2 0x00020000 /* table belongs to stats class 2 */ -#define JET_bitTableClass3 0x00030000 /* table belongs to stats class 3 */ -#define JET_bitTableClass4 0x00040000 /* table belongs to stats class 4 */ -#define JET_bitTableClass5 0x00050000 /* table belongs to stats class 5 */ -#define JET_bitTableClass6 0x00060000 /* table belongs to stats class 6 */ -#define JET_bitTableClass7 0x00070000 /* table belongs to stats class 7 */ -#define JET_bitTableClass8 0x00080000 /* table belongs to stats class 8 */ -#define JET_bitTableClass9 0x00090000 /* table belongs to stats class 9 */ -#define JET_bitTableClass10 0x000A0000 /* table belongs to stats class 10 */ -#define JET_bitTableClass11 0x000B0000 /* table belongs to stats class 11 */ -#define JET_bitTableClass12 0x000C0000 /* table belongs to stats class 12 */ -#define JET_bitTableClass13 0x000D0000 /* table belongs to stats class 13 */ -#define JET_bitTableClass14 0x000E0000 /* table belongs to stats class 14 */ -#define JET_bitTableClass15 0x000F0000 /* table belongs to stats class 15 */ +#define JET_bitTableClassMask 0x001F0000 /* table stats class mask */ +#define JET_bitTableClassNone 0x00000000 /* table belongs to no stats class (default) */ +#define JET_bitTableClass1 0x00010000 /* table belongs to stats class 1 */ +#define JET_bitTableClass2 0x00020000 /* table belongs to stats class 2 */ +#define JET_bitTableClass3 0x00030000 /* table belongs to stats class 3 */ +#define JET_bitTableClass4 0x00040000 /* table belongs to stats class 4 */ +#define JET_bitTableClass5 0x00050000 /* table belongs to stats class 5 */ +#define JET_bitTableClass6 0x00060000 /* table belongs to stats class 6 */ +#define JET_bitTableClass7 0x00070000 /* table belongs to stats class 7 */ +#define JET_bitTableClass8 0x00080000 /* table belongs to stats class 8 */ +#define JET_bitTableClass9 0x00090000 /* table belongs to stats class 9 */ +#define JET_bitTableClass10 0x000A0000 /* table belongs to stats class 10 */ +#define JET_bitTableClass11 0x000B0000 /* table belongs to stats class 11 */ +#define JET_bitTableClass12 0x000C0000 /* table belongs to stats class 12 */ +#define JET_bitTableClass13 0x000D0000 /* table belongs to stats class 13 */ +#define JET_bitTableClass14 0x000E0000 /* table belongs to stats class 14 */ +#define JET_bitTableClass15 0x000F0000 /* table belongs to stats class 15 */ // end_PubEsent #if ( JET_VERSION >= 0x0A01 ) -#define JET_bitTableClass16 0x00100000 /* table belongs to stats class 16 */ -#define JET_bitTableClass17 0x00110000 /* table belongs to stats class 17 */ -#define JET_bitTableClass18 0x00120000 /* table belongs to stats class 18 */ -#define JET_bitTableClass19 0x00130000 /* table belongs to stats class 19 */ -#define JET_bitTableClass20 0x00140000 /* table belongs to stats class 20 */ -#define JET_bitTableClass21 0x00150000 /* table belongs to stats class 21 */ -#define JET_bitTableClass22 0x00160000 /* table belongs to stats class 22 */ -#define JET_bitTableClass23 0x00170000 /* table belongs to stats class 23 */ -#define JET_bitTableClass24 0x00180000 /* table belongs to stats class 24 */ -#define JET_bitTableClass25 0x00190000 /* table belongs to stats class 25 */ -#define JET_bitTableClass26 0x001A0000 /* table belongs to stats class 26 */ -#define JET_bitTableClass27 0x001B0000 /* table belongs to stats class 27 */ -#define JET_bitTableClass28 0x001C0000 /* table belongs to stats class 28 */ -#define JET_bitTableClass29 0x001D0000 /* table belongs to stats class 29 */ -#define JET_bitTableClass30 0x001E0000 /* table belongs to stats class 30 */ -#define JET_bitTableClass31 0x001F0000 /* table belongs to stats class 31 */ +#define JET_bitTableClass16 0x00100000 /* table belongs to stats class 16 */ +#define JET_bitTableClass17 0x00110000 /* table belongs to stats class 17 */ +#define JET_bitTableClass18 0x00120000 /* table belongs to stats class 18 */ +#define JET_bitTableClass19 0x00130000 /* table belongs to stats class 19 */ +#define JET_bitTableClass20 0x00140000 /* table belongs to stats class 20 */ +#define JET_bitTableClass21 0x00150000 /* table belongs to stats class 21 */ +#define JET_bitTableClass22 0x00160000 /* table belongs to stats class 22 */ +#define JET_bitTableClass23 0x00170000 /* table belongs to stats class 23 */ +#define JET_bitTableClass24 0x00180000 /* table belongs to stats class 24 */ +#define JET_bitTableClass25 0x00190000 /* table belongs to stats class 25 */ +#define JET_bitTableClass26 0x001A0000 /* table belongs to stats class 26 */ +#define JET_bitTableClass27 0x001B0000 /* table belongs to stats class 27 */ +#define JET_bitTableClass28 0x001C0000 /* table belongs to stats class 28 */ +#define JET_bitTableClass29 0x001D0000 /* table belongs to stats class 29 */ +#define JET_bitTableClass30 0x001E0000 /* table belongs to stats class 30 */ +#define JET_bitTableClass31 0x001F0000 /* table belongs to stats class 31 */ #endif // JET_VERSION >= 0x0A01 // begin_PubEsent @@ -4863,7 +4882,7 @@ typedef struct #if ( JET_VERSION >= 0x0601 ) - /* Space Hint Flags / JET_SPACEHINTS */ + /* Space Hint Flags / JET_SPACEHINTS */ // Generic #define JET_bitSpaceHintsUtilizeParentSpace 0x00000001 // This changes the internal allocation policy to get space hierarchically from a B-Tree's immediate parent. @@ -4894,28 +4913,28 @@ typedef struct typedef struct { JET_COLUMNID columnid; - const void *pvData; - unsigned long cbData; + JET_PCVOID pvData; + JET_UINT32 cbData; JET_GRBIT grbit; - unsigned long ibLongValue; - unsigned long itagSequence; + JET_UINT32 ibLongValue; + JET_UINT32 itagSequence; JET_ERR err; } JET_SETCOLUMN; #if ( JET_VERSION >= 0x0501 ) typedef struct { - unsigned long paramid; + JET_UINT32 paramid; JET_API_PTR lParam; - const char *sz; + JET_PCSTR sz; JET_ERR err; } JET_SETSYSPARAM_A; typedef struct { - unsigned long paramid; + JET_UINT32 paramid; JET_API_PTR lParam; - const WCHAR *sz; + JET_PCWSTR sz; JET_ERR err; } JET_SETSYSPARAM_W; @@ -4985,9 +5004,9 @@ typedef struct // end_PubEsent #define JET_bitRetrieveLongId 0x00000040 #define JET_bitRetrieveLongValueRefCount 0x00000080 /* for testing use only */ -// #define JET_bitRetrieveSLVAsSLVInfo 0x00000100 /* internal use only */ +// #define JET_bitRetrieveSLVAsSLVInfo 0x00000100 /* internal use only */ - /* Flags for JetRetrieveColumn when the SLV Provider is enabled */ + /* Flags for JetRetrieveColumn when the SLV Provider is enabled */ // #define JET_bitRetrieveSLVAsSLVFile 0x00000200 /* retrieve SLV as an SLV File handle */ // #define JET_bitRetrieveSLVAsSLVEA 0x00000400 /* retrieve SLV as an SLV EA list */ @@ -5031,12 +5050,12 @@ typedef struct typedef struct { JET_COLUMNID columnid; - void *pvData; - unsigned long cbData; - unsigned long cbActual; + JET_PVOID pvData; + JET_UINT32 cbData; + JET_UINT32 cbActual; JET_GRBIT grbit; - unsigned long ibLongValue; - unsigned long itagSequence; + JET_UINT32 ibLongValue; + JET_UINT32 itagSequence; JET_COLUMNID columnidNextTagged; JET_ERR err; } JET_RETRIEVECOLUMN; @@ -5046,17 +5065,17 @@ typedef struct typedef struct { JET_COLUMNID columnid; - unsigned short cMultiValues; + JET_UINT16 cMultiValues; union { - unsigned short usFlags; + JET_UINT16 usFlags; struct { - unsigned short fLongValue:1; // is column LongText/Binary? - unsigned short fDefaultValue:1; // was a default value retrieved? - unsigned short fNullOverride:1; // was there an explicit null to override a default value? - unsigned short fDerived:1; // was column derived from template table? + JET_UINT16 fLongValue:1; // is column LongText/Binary? + JET_UINT16 fDefaultValue:1; // was a default value retrieved? + JET_UINT16 fNullOverride:1; // was there an explicit null to override a default value? + JET_UINT16 fDerived:1; // was column derived from template table? }; }; } JET_RETRIEVEMULTIVALUECOUNT; @@ -5092,16 +5111,16 @@ typedef struct typedef struct { JET_COLUMNID columnid; - unsigned long ctagSequence; - unsigned long* rgtagSequence; + JET_UINT32 ctagSequence; + JET_UINT32 * rgtagSequence; } JET_ENUMCOLUMNID; typedef struct { - unsigned long itagSequence; + JET_UINT32 itagSequence; JET_ERR err; - unsigned long cbData; - void* pvData; + JET_UINT32 cbData; + JET_PVOID pvData; } JET_ENUMCOLUMNVALUE; typedef struct @@ -5110,25 +5129,25 @@ typedef struct JET_ERR err; union { - struct /* err != JET_wrnColumnSingleValue */ + struct // err != JET_wrnColumnSingleValue { - unsigned long cEnumColumnValue; + JET_UINT32 cEnumColumnValue; JET_ENUMCOLUMNVALUE* rgEnumColumnValue; }; - struct /* err == JET_wrnColumnSingleValue */ + struct // err == JET_wrnColumnSingleValue { - unsigned long cbData; - void* pvData; + JET_UINT32 cbData; + JET_PVOID pvData; }; }; } JET_ENUMCOLUMN; /* Realloc callback for JetEnumerateColumns */ -typedef void* (JET_API *JET_PFNREALLOC)( - _In_opt_ void * pvContext, - _In_opt_ void * pv, - _In_ unsigned long cb ); +typedef JET_PVOID (JET_API * JET_PFNREALLOC)( + _In_opt_ JET_PVOID pvContext, + _In_opt_ JET_PVOID pv, + _In_ JET_UINT32 cb ); #endif // JET_VERSION >= 0x0501 @@ -5163,14 +5182,14 @@ typedef void* (JET_API *JET_PFNREALLOC)( typedef struct { - unsigned __int64 cbData; // user data in record - unsigned __int64 cbLongValueData; // user data associated with the record but stored in the long-value tree (NOTE: does NOT count intrinsic long-values) - unsigned __int64 cbOverhead; // record overhead - unsigned __int64 cbLongValueOverhead; // overhead of long-value data (NOTE: does not count intrinsic long-values) - unsigned __int64 cNonTaggedColumns; // total number of fixed/variable columns - unsigned __int64 cTaggedColumns; // total number of tagged columns - unsigned __int64 cLongValues; // total number of values stored in the long-value tree for this record (NOTE: does NOT count intrinsic long-values) - unsigned __int64 cMultiValues; // total number of values beyond the first for each column in the record + JET_UINT64 cbData; // user data in record + JET_UINT64 cbLongValueData; // user data associated with the record but stored in the long-value tree (NOTE: does NOT count intrinsic long-values) + JET_UINT64 cbOverhead; // record overhead + JET_UINT64 cbLongValueOverhead; // overhead of long-value data (NOTE: does not count intrinsic long-values) + JET_UINT64 cNonTaggedColumns; // total number of fixed/variable columns + JET_UINT64 cTaggedColumns; // total number of tagged columns + JET_UINT64 cLongValues; // total number of values stored in the long-value tree for this record (NOTE: does NOT count intrinsic long-values) + JET_UINT64 cMultiValues; // total number of values beyond the first for each column in the record } JET_RECSIZE; #endif // JET_VERSION >= 0x0600 @@ -5179,14 +5198,14 @@ typedef struct #if ( JET_VERSION >= 0x0600 ) typedef struct tagJET_PAGEINFO { - unsigned long pgno; // pgno for the page. must be passed in - unsigned long fPageIsInitialized:1; // false if the page is zeroed - unsigned long fCorrectableError:1; // correctable error found on page - unsigned __int64 checksumActual; // checksum stored on the page - unsigned __int64 checksumExpected; // checksum expected for the page - unsigned __int64 dbtime; // dbtime on the page - unsigned __int64 structureChecksum; // checksum of the page structure - unsigned __int64 flags; // currently unused + JET_UINT32 pgno; // pgno for the page. must be passed in + JET_UINT32 fPageIsInitialized:1; // false if the page is zeroed + JET_UINT32 fCorrectableError:1; // correctable error found on page + JET_UINT64 checksumActual; // checksum stored on the page + JET_UINT64 checksumExpected; // checksum expected for the page + JET_UINT64 dbtime; // dbtime on the page + JET_UINT64 structureChecksum; // checksum of the page structure + JET_UINT64 flags; // currently unused } JET_PAGEINFO; #endif // JET_VERSION >= 0x0600 @@ -5194,38 +5213,38 @@ typedef struct tagJET_PAGEINFO #if ( JET_VERSION >= 0x0601 ) typedef struct { - unsigned __int64 cbData; // user data in record - unsigned __int64 cbLongValueData; // user data associated with the record but stored in the long-value tree (NOTE: does NOT count intrinsic long-values) - unsigned __int64 cbOverhead; // record overhead - unsigned __int64 cbLongValueOverhead; // overhead of long-value data (NOTE: does not count intrinsic long-values) - unsigned __int64 cNonTaggedColumns; // total number of fixed/variable columns - unsigned __int64 cTaggedColumns; // total number of tagged columns - unsigned __int64 cLongValues; // total number of values stored in the long-value tree for this record (NOTE: does NOT count intrinsic long-values) - unsigned __int64 cMultiValues; // total number of values beyond the first for each column in the record - unsigned __int64 cCompressedColumns; // total number of columns which are compressed - unsigned __int64 cbDataCompressed; // compressed size of user data in record (same as cbData if no intrinsic long-values are compressed) - unsigned __int64 cbLongValueDataCompressed; // compressed size of user data in the long-value tree (same as cbLongValue data if no separated long values are compressed) + JET_UINT64 cbData; // user data in record + JET_UINT64 cbLongValueData; // user data associated with the record but stored in the long-value tree (NOTE: does NOT count intrinsic long-values) + JET_UINT64 cbOverhead; // record overhead + JET_UINT64 cbLongValueOverhead; // overhead of long-value data (NOTE: does not count intrinsic long-values) + JET_UINT64 cNonTaggedColumns; // total number of fixed/variable columns + JET_UINT64 cTaggedColumns; // total number of tagged columns + JET_UINT64 cLongValues; // total number of values stored in the long-value tree for this record (NOTE: does NOT count intrinsic long-values) + JET_UINT64 cMultiValues; // total number of values beyond the first for each column in the record + JET_UINT64 cCompressedColumns; // total number of columns which are compressed + JET_UINT64 cbDataCompressed; // compressed size of user data in record (same as cbData if no intrinsic long-values are compressed) + JET_UINT64 cbLongValueDataCompressed; // compressed size of user data in the long-value tree (same as cbLongValue data if no separated long values are compressed) } JET_RECSIZE2; #endif // JET_VERSION >= 0x0601 // end_PubEsent #if ( JET_VERSION >= 0x0A01 ) typedef struct { - unsigned __int64 cbData; // user data in record - unsigned __int64 cbLongValueData; // user data associated with the record but stored in the long-value tree (NOTE: does NOT count intrinsic long-values) - unsigned __int64 cbOverhead; // record overhead - unsigned __int64 cbLongValueOverhead; // overhead of long-value data (NOTE: does not count intrinsic long-values) - unsigned __int64 cNonTaggedColumns; // total number of fixed/variable columns - unsigned __int64 cTaggedColumns; // total number of tagged columns - unsigned __int64 cLongValues; // total number of values stored in the long-value tree for this record (NOTE: does NOT count intrinsic long-values) - unsigned __int64 cMultiValues; // total number of values beyond the first for each column in the record - unsigned __int64 cCompressedColumns; // total number of columns which are compressed - unsigned __int64 cbDataCompressed; // compressed size of user data in record (same as cbData if no intrinsic long-values are compressed) - unsigned __int64 cbLongValueDataCompressed; // compressed size of user data in the long-value tree (same as cbLongValue data if no separated long values are compressed) - unsigned __int64 cbIntrinsicLongValueData; // user data stored in intrinsic LVs (in the record). - unsigned __int64 cbIntrinsicLongValueDataCompressed; // compressed size of user data stored in intrinsic LVs (in the record). - unsigned __int64 cIntrinsicLongValues; // total number of intrinsic LVs stored in the record. - unsigned __int64 cbKey; // Key size in bytes. Doesn't include storage overhead. Does include key normalization overhead. + JET_UINT64 cbData; // user data in record + JET_UINT64 cbLongValueData; // user data associated with the record but stored in the long-value tree (NOTE: does NOT count intrinsic long-values) + JET_UINT64 cbOverhead; // record overhead + JET_UINT64 cbLongValueOverhead; // overhead of long-value data (NOTE: does not count intrinsic long-values) + JET_UINT64 cNonTaggedColumns; // total number of fixed/variable columns + JET_UINT64 cTaggedColumns; // total number of tagged columns + JET_UINT64 cLongValues; // total number of values stored in the long-value tree for this record (NOTE: does NOT count intrinsic long-values) + JET_UINT64 cMultiValues; // total number of values beyond the first for each column in the record + JET_UINT64 cCompressedColumns; // total number of columns which are compressed + JET_UINT64 cbDataCompressed; // compressed size of user data in record (same as cbData if no intrinsic long-values are compressed) + JET_UINT64 cbLongValueDataCompressed; // compressed size of user data in the long-value tree (same as cbLongValue data if no separated long values are compressed) + JET_UINT64 cbIntrinsicLongValueData; // user data stored in intrinsic LVs (in the record). + JET_UINT64 cbIntrinsicLongValueDataCompressed; // compressed size of user data stored in intrinsic LVs (in the record). + JET_UINT64 cIntrinsicLongValues; // total number of intrinsic LVs stored in the record. + JET_UINT64 cbKey; // Key size in bytes. Doesn't include storage overhead. Does include key normalization overhead. } JET_RECSIZE3; #endif // JET_VERSION >= 0x0A01 @@ -5234,8 +5253,8 @@ typedef struct typedef struct tagJET_PAGEINFO2 { JET_PAGEINFO pageInfo; - unsigned __int64 rgChecksumActual[ 3 ]; // more checksum stored on the page - unsigned __int64 rgChecksumExpected[ 3]; // more checksum expected for the page + JET_UINT64 rgChecksumActual[ 3 ]; // more checksum stored on the page + JET_UINT64 rgChecksumExpected[ 3]; // more checksum expected for the page } JET_PAGEINFO2; #endif // JET_VERSION >= 0x0601 @@ -5292,7 +5311,7 @@ typedef struct tagJET_PAGEINFO2 #define JET_bitCopySnapshot 0x00000002 /* bit 1: normal (0) or copy (1) snapshot */ #define JET_bitContinueAfterThaw 0x00000004 /* bit 2: end on thaw (0) or wait for [truncate +] end snapshot */ #if ( JET_VERSION >= 0x0601 ) -#define JET_bitExplicitPrepare 0x00000008 /* bit 3: all instaces prepared by default (0) or no instance prepared by default (1) */ +#define JET_bitExplicitPrepare 0x00000008 /* bit 3: all instaces prepared by default (0) or no instance prepared by default (1) */ #endif // JET_VERSION >= 0x0601 /* Flags for JetOSSnapshotTruncateLog & JetOSSnapshotTruncateLogInstance */ @@ -5317,13 +5336,13 @@ typedef struct tagJET_PAGEINFO2 typedef struct tag_JET_EMITDATACTX { - unsigned long cbStruct; - unsigned long dwVersion; - unsigned __int64 qwSequenceNum; + JET_UINT32 cbStruct; + JET_UINT32 dwVersion; + JET_UINT64 qwSequenceNum; JET_GRBIT grbitOperationalFlags; JET_LOGTIME logtimeEmit; JET_LGPOS lgposLogData; - unsigned long cbLogData; + JET_UINT32 cbLogData; } JET_EMITDATACTX; // 40 bytes @@ -5332,9 +5351,9 @@ typedef struct tag_JET_EMITDATACTX typedef JET_ERR (JET_API * JET_PFNEMITLOGDATA)( _In_ JET_INSTANCE instance, _In_ JET_EMITDATACTX * pEmitLogDataCtx, - _In_ void * pvLogData, - _In_ unsigned long cbLogData, - _In_ void * callbackCtx ); + _In_ JET_PVOID pvLogData, + _In_ JET_UINT32 cbLogData, + _In_ JET_PVOID callbackCtx ); #endif // JET_VERSION >= 0x0601 @@ -5497,7 +5516,7 @@ typedef JET_ERR (JET_API * JET_PFNEMITLOGDATA)( // Windows 10 #if ( JET_VERSION >= 0x0A00 ) -#define JET_coltypUnsignedLongLong 18 /* 8-byte unsigned integer */ +#define JET_coltypUnsignedLongLong 18 /* 8-byte unsigned integer */ #define JET_coltypMax 19 /* the number of column types */ /* used for validity tests and */ /* array declarations. */ @@ -5804,7 +5823,7 @@ typedef JET_ERR (JET_API * JET_PFNEMITLOGDATA)( #define JET_ExceptionFailFast 0x0004 /* Use the Windows RaiseFailFastException API to force a crash */ // end_PubEsent - /* AssertAction / JET_paramAssertAction */ + /* AssertAction / JET_paramAssertAction */ #define JET_AssertExit 0x0000 /* Exit the application */ #define JET_AssertBreak 0x0001 /* Break to debugger */ @@ -5956,36 +5975,36 @@ typedef JET_ERR (JET_API * JET_PFNEMITLOGDATA)( // BUFFER MANAGER errors // // end_PubEsent -#define wrnBFCacheMiss 200 /* ese97,esent only: page latch caused a cache miss */ -#define errBFPageNotCached -201 /* page is not cached */ -#define errBFLatchConflict -202 /* page latch conflict */ -#define errBFPageCached -203 /* page is cached */ -#define wrnBFPageFlushPending 204 /* page is currently being written */ -#define wrnBFPageFault 205 /* page latch caused a page fault */ -#define wrnBFBadLatchHint 206 /* page latch hint was incorrect */ -#define wrnBFLatchMaintConflict 207 /* page latch conflict with foreground maintenance */ -#define wrnBFIWriteIOComplete 208 /* signal a successful write IO from the async IO completion function */ - -#define errBFIPageEvicted -250 /* ese97,esent only: page evicted from the cache */ -#define errBFIPageCached -251 /* ese97,esent only: page already cached */ -#define errBFIOutOfOLPs -252 /* ese97,esent only: out of OLPs */ -#define errBFIOutOfBatchIOBuffers -253 /* out of Batch I/O (Opportune write) Buffers */ -#define errBFINoBufferAvailable -254 /* no buffer available for immediate use */ +#define wrnBFCacheMiss 200 /* ese97,esent only: page latch caused a cache miss */ +#define errBFPageNotCached -201 /* page is not cached */ +#define errBFLatchConflict -202 /* page latch conflict */ +#define errBFPageCached -203 /* page is cached */ +#define wrnBFPageFlushPending 204 /* page is currently being written */ +#define wrnBFPageFault 205 /* page latch caused a page fault */ +#define wrnBFBadLatchHint 206 /* page latch hint was incorrect */ +#define wrnBFLatchMaintConflict 207 /* page latch conflict with foreground maintenance */ +#define wrnBFIWriteIOComplete 208 /* signal a successful write IO from the async IO completion function */ + +#define errBFIPageEvicted -250 /* ese97,esent only: page evicted from the cache */ +#define errBFIPageCached -251 /* ese97,esent only: page already cached */ +#define errBFIOutOfOLPs -252 /* ese97,esent only: out of OLPs */ +#define errBFIOutOfBatchIOBuffers -253 /* out of Batch I/O (Opportune write) Buffers */ +#define errBFINoBufferAvailable -254 /* no buffer available for immediate use */ // begin_PubEsent #define JET_errDatabaseBufferDependenciesCorrupted -255 /* Buffer dependencies improperly set. Recovery failure */ // end_PubEsent -#define errBFIRemainingDependencies -256 /* dependencies remain on this buffer */ -#define errBFIPageFlushPending -257 /* page is currently being written */ +#define errBFIRemainingDependencies -256 /* dependencies remain on this buffer */ +#define errBFIPageFlushPending -257 /* page is currently being written */ #define errBFIPageDirty -258 /* the page could not be evicted from the cache because it or its versions were not clean enough */ -#define errBFIPageFlushed -259 /* page write initiated */ -#define errBFIPageFaultPending -260 /* page is currently being read */ -#define errBFIPageNotVerified -261 /* page data has not been verified */ -#define errBFIDependentPurged -262 /* page cannot be flushed due to purged dependencies */ -#define errBFIPageFlushDisallowedOnIOThread -263 /* the page couldn't be written because ErrBFIFlushPage is being called from the I/O thread */ +#define errBFIPageFlushed -259 /* page write initiated */ +#define errBFIPageFaultPending -260 /* page is currently being read */ +#define errBFIPageNotVerified -261 /* page data has not been verified */ +#define errBFIDependentPurged -262 /* page cannot be flushed due to purged dependencies */ +#define errBFIPageFlushDisallowedOnIOThread -263 /* the page couldn't be written because ErrBFIFlushPage is being called from the I/O thread */ #define errBFIPageTouchTooRecent -264 /* the page could not be flushed because a recent page touch would offend the waypoint */ #define errBFICheckpointWorkRemaining -266 /* checkpoint depth maintenance is not finished due to page flushes or dependency flushes remaining */ #define errBFIPageRemapNotReVerified -267 /* page is remapped after a write, which means it needs to be reverified */ -#define errBFIReqSyncFlushMapWriteFailed -268 /* UNUSED: required synchronous write to the flush map failed */ +#define errBFIReqSyncFlushMapWriteFailed -268 /* UNUSED: required synchronous write to the flush map failed */ #define errBFIPageFlushPendingHungIO -269 /* page is currently being written and the write I/O is hung */ #define errBFIPageFaultPendingHungIO -270 /* page is currently being read and the read I/O is hung */ #define errBFIPageFlushPendingSlowIO -271 /* page is currently being written and the write I/O is slow */ @@ -6463,7 +6482,7 @@ typedef JET_ERR (JET_API * JET_PFNEMITLOGDATA)( // begin_PubEsent #define JET_errDatabaseNotReady -1230 /* Recovery on this database has not yet completed enough to permit access. */ -#define JET_errDatabaseAttachedForRecovery -1231 /* Database is attached but only for recovery. It must be explicitly attached before it can be opened. */ +#define JET_errDatabaseAttachedForRecovery -1231 /* Database is attached but only for recovery. It must be explicitly attached before it can be opened. */ #define JET_errTransactionsNotReadyDuringRecovery -1232 /* Recovery has not seen any Begin0/Commit0 records and so does not know what trxBegin0 to assign to this transaction */ @@ -6935,10 +6954,10 @@ JetCreateInstance2W( JET_ERR JET_API JetGetInstanceMiscInfo( - _In_ JET_INSTANCE instance, - _Out_writes_bytes_( cbMax ) void * pvResult, - _In_ unsigned long cbMax, - _In_ unsigned long InfoLevel ); + _In_ JET_INSTANCE instance, + _Out_writes_bytes_( cbMax ) JET_PVOID pvResult, + _In_ JET_UINT32 cbMax, + _In_ JET_UINT32 InfoLevel ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -7038,7 +7057,7 @@ JET_ERR JET_API JetSetSystemParameterA( _Inout_opt_ JET_INSTANCE * pinstance, _In_opt_ JET_SESID sesid, - _In_ unsigned long paramid, + _In_ JET_UINT32 paramid, _In_opt_ JET_API_PTR lParam, _In_opt_ JET_PCSTR szParam ); @@ -7054,7 +7073,7 @@ JET_ERR JET_API JetSetSystemParameterW( _Inout_opt_ JET_INSTANCE * pinstance, _In_opt_ JET_SESID sesid, - _In_ unsigned long paramid, + _In_ JET_UINT32 paramid, _In_opt_ JET_API_PTR lParam, _In_opt_ JET_PCWSTR szParam ); @@ -7079,10 +7098,10 @@ JET_ERR JET_API JetGetSystemParameterA( _In_ JET_INSTANCE instance, _In_opt_ JET_SESID sesid, - _In_ unsigned long paramid, + _In_ JET_UINT32 paramid, _Out_opt_ JET_API_PTR * plParam, _Out_writes_bytes_opt_( cbMax ) JET_PSTR szParam, - _In_ unsigned long cbMax ); + _In_ JET_UINT32 cbMax ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -7096,10 +7115,10 @@ JET_ERR JET_API JetGetSystemParameterW( _In_ JET_INSTANCE instance, _In_opt_ JET_SESID sesid, - _In_ unsigned long paramid, + _In_ JET_UINT32 paramid, _Out_opt_ JET_API_PTR * plParam, _Out_writes_bytes_opt_( cbMax ) JET_PWSTR szParam, - _In_ unsigned long cbMax ); + _In_ JET_UINT32 cbMax ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -7120,17 +7139,17 @@ JetGetSystemParameterW( JET_ERR JET_API JetSetResourceParam( - _In_ JET_INSTANCE instance, - _In_ JET_RESOPER resoper, - _In_ JET_RESID resid, - _In_ JET_API_PTR ulParam ); + _In_ JET_INSTANCE instance, + _In_ JET_RESOPER resoper, + _In_ JET_RESID resid, + _In_ JET_API_PTR ulParam ); JET_ERR JET_API JetGetResourceParam( - _In_ JET_INSTANCE instance, - _In_ JET_RESOPER resoper, - _In_ JET_RESID resid, - _Out_ JET_API_PTR* pulParam ); + _In_ JET_INSTANCE instance, + _In_ JET_RESOPER resoper, + _In_ JET_RESID resid, + _Out_ JET_API_PTR * pulParam ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) */ #pragma endregion @@ -7150,8 +7169,8 @@ JetGetResourceParam( JET_ERR JET_API JetEnableMultiInstanceA( _In_reads_opt_( csetsysparam ) JET_SETSYSPARAM_A * psetsysparam, - _In_ unsigned long csetsysparam, - _Out_opt_ unsigned long * pcsetsucceed ); + _In_ JET_UINT32 csetsysparam, + _Out_opt_ JET_UINT32 * pcsetsucceed ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -7164,8 +7183,8 @@ JetEnableMultiInstanceA( JET_ERR JET_API JetEnableMultiInstanceW( _In_reads_opt_( csetsysparam ) JET_SETSYSPARAM_W * psetsysparam, - _In_ unsigned long csetsysparam, - _Out_opt_ unsigned long * pcsetsucceed ); + _In_ JET_UINT32 csetsysparam, + _Out_opt_ JET_UINT32 * pcsetsucceed ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -7186,14 +7205,14 @@ JetEnableMultiInstanceW( JET_ERR JET_API JetResetCounter( - _In_ JET_SESID sesid, - _In_ long CounterType ); + _In_ JET_SESID sesid, + _In_ JET_INT32 CounterType ); JET_ERR JET_API JetGetCounter( - _In_ JET_SESID sesid, - _In_ long CounterType, - _Out_ long * plValue ); + _In_ JET_SESID sesid, + _In_ JET_INT32 CounterType, + _Out_ JET_INT32 * plValue ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) */ #pragma endregion @@ -7207,8 +7226,8 @@ JetGetCounter( JET_ERR JET_API JetGetThreadStats( - _Out_writes_bytes_( cbMax ) void * pvResult, - _In_ unsigned long cbMax ); + _Out_writes_bytes_( cbMax ) JET_PVOID pvResult, + _In_ JET_UINT32 cbMax ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -7282,10 +7301,10 @@ JetEndSession( JET_ERR JET_API JetGetSessionInfo( - _In_ JET_SESID sesid, - _Out_writes_bytes_( cbMax ) void * pvResult, - _In_ const unsigned long cbMax, - _In_ const unsigned long ulInfoLevel ); + _In_ JET_SESID sesid, + _Out_writes_bytes_( cbMax ) JET_PVOID pvResult, + _In_ const JET_UINT32 cbMax, + _In_ const JET_UINT32 ulInfoLevel ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) */ #pragma endregion @@ -7300,7 +7319,7 @@ JetGetSessionInfo( JET_ERR JET_API JetGetVersion( _In_ JET_SESID sesid, - _Out_ unsigned long * pwVersion ); + _Out_ JET_UINT32 * pwVersion ); JET_ERR JET_API JetIdle( @@ -7425,7 +7444,7 @@ JET_ERR JET_API JetCreateDatabase2A( _In_ JET_SESID sesid, _In_ JET_PCSTR szFilename, - _In_ const unsigned long cpgDatabaseSizeMax, + _In_ const JET_UINT32 cpgDatabaseSizeMax, _Out_ JET_DBID * pdbid, _In_ JET_GRBIT grbit ); @@ -7440,7 +7459,7 @@ JetCreateDatabase2A( JET_ERR JET_API JetCreateDatabase2W( _In_ JET_SESID sesid, _In_ JET_PCWSTR szFilename, - _In_ const unsigned long cpgDatabaseSizeMax, + _In_ const JET_UINT32 cpgDatabaseSizeMax, _Out_ JET_DBID * pdbid, _In_ JET_GRBIT grbit ); @@ -7466,7 +7485,7 @@ JET_ERR JET_API JetCreateDatabase3A( _In_ JET_PCSTR szFilename, _Out_ JET_DBID * pdbid, _In_reads_opt_( csetdbparam ) JET_SETDBPARAM * rgsetdbparam, - _In_ unsigned long csetdbparam, + _In_ JET_UINT32 csetdbparam, _In_ JET_GRBIT grbit ); JET_ERR JET_API JetCreateDatabase3W( @@ -7474,7 +7493,7 @@ JET_ERR JET_API JetCreateDatabase3W( _In_ JET_PCWSTR szFilename, _Out_ JET_DBID * pdbid, _In_reads_opt_( csetdbparam ) JET_SETDBPARAM * rgsetdbparam, - _In_ unsigned long csetdbparam, + _In_ JET_UINT32 csetdbparam, _In_ JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ @@ -7537,7 +7556,7 @@ JET_ERR JET_API JetAttachDatabase2A( _In_ JET_SESID sesid, _In_ JET_PCSTR szFilename, - _In_ const unsigned long cpgDatabaseSizeMax, + _In_ const JET_UINT32 cpgDatabaseSizeMax, _In_ JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ @@ -7552,7 +7571,7 @@ JET_ERR JET_API JetAttachDatabase2W( _In_ JET_SESID sesid, _In_ JET_PCWSTR szFilename, - _In_ const unsigned long cpgDatabaseSizeMax, + _In_ const JET_UINT32 cpgDatabaseSizeMax, _In_ JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ @@ -7577,7 +7596,7 @@ JetAttachDatabase3A( _In_ JET_SESID sesid, _In_ JET_PCSTR szFilename, _In_reads_opt_( csetdbparam ) JET_SETDBPARAM * rgsetdbparam, - _In_ unsigned long csetdbparam, + _In_ JET_UINT32 csetdbparam, _In_ JET_GRBIT grbit ); JET_ERR JET_API @@ -7585,7 +7604,7 @@ JetAttachDatabase3W( _In_ JET_SESID sesid, _In_ JET_PCWSTR szFilename, _In_reads_opt_( csetdbparam ) JET_SETDBPARAM * rgsetdbparam, - _In_ unsigned long csetdbparam, + _In_ JET_UINT32 csetdbparam, _In_ JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ @@ -7685,14 +7704,14 @@ JetDetachDatabase2W( JET_ERR JET_API JetGetObjectInfoA( - _In_ JET_SESID sesid, - _In_ JET_DBID dbid, - _In_ JET_OBJTYP objtyp, - _In_opt_ JET_PCSTR szContainerName, - _In_opt_ JET_PCSTR szObjectName, - _Out_writes_bytes_( cbMax ) void * pvResult, - _In_ unsigned long cbMax, - _In_ unsigned long InfoLevel ); + _In_ JET_SESID sesid, + _In_ JET_DBID dbid, + _In_ JET_OBJTYP objtyp, + _In_opt_ JET_PCSTR szContainerName, + _In_opt_ JET_PCSTR szObjectName, + _Out_writes_bytes_( cbMax ) JET_PVOID pvResult, + _In_ JET_UINT32 cbMax, + _In_ JET_UINT32 InfoLevel ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -7704,14 +7723,14 @@ JetGetObjectInfoA( JET_ERR JET_API JetGetObjectInfoW( - _In_ JET_SESID sesid, - _In_ JET_DBID dbid, - _In_ JET_OBJTYP objtyp, - _In_opt_ JET_PCWSTR szContainerName, - _In_opt_ JET_PCWSTR szObjectName, - _Out_writes_bytes_( cbMax ) void * pvResult, - _In_ unsigned long cbMax, - _In_ unsigned long InfoLevel ); + _In_ JET_SESID sesid, + _In_ JET_DBID dbid, + _In_ JET_OBJTYP objtyp, + _In_opt_ JET_PCWSTR szContainerName, + _In_opt_ JET_PCWSTR szObjectName, + _Out_writes_bytes_( cbMax ) JET_PVOID pvResult, + _In_ JET_UINT32 cbMax, + _In_ JET_UINT32 InfoLevel ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -7733,11 +7752,11 @@ JetGetObjectInfoW( JET_ERR JET_API JetGetTableInfoA( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _Out_writes_bytes_( cbMax ) void * pvResult, - _In_ unsigned long cbMax, - _In_ unsigned long InfoLevel ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _Out_writes_bytes_( cbMax ) JET_PVOID pvResult, + _In_ JET_UINT32 cbMax, + _In_ JET_UINT32 InfoLevel ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -7749,11 +7768,11 @@ JetGetTableInfoA( JET_ERR JET_API JetGetTableInfoW( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _Out_writes_bytes_( cbMax ) void * pvResult, - _In_ unsigned long cbMax, - _In_ unsigned long InfoLevel ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _Out_writes_bytes_( cbMax ) JET_PVOID pvResult, + _In_ JET_UINT32 cbMax, + _In_ JET_UINT32 InfoLevel ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -7773,20 +7792,20 @@ JetGetTableInfoW( JET_ERR JET_API JetSetTableInfoW( - _In_opt_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _In_reads_bytes_opt_( cbParam ) const void * pvParam, - _In_ unsigned long cbParam, - _In_ unsigned long InfoLevel ); + _In_opt_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _In_reads_bytes_opt_( cbParam ) JET_PCVOID pvParam, + _In_ JET_UINT32 cbParam, + _In_ JET_UINT32 InfoLevel ); JET_ERR JET_API JetSetTableInfoA( - _In_opt_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _In_reads_bytes_opt_( cbParam ) const void * pvParam, - _In_ unsigned long cbParam, - _In_ unsigned long InfoLevel ); + _In_opt_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _In_reads_bytes_opt_( cbParam ) JET_PCVOID pvParam, + _In_ JET_UINT32 cbParam, + _In_ JET_UINT32 InfoLevel ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -7799,10 +7818,10 @@ JetSetTableInfoA( JET_ERR JET_API JetCreateEncryptionKey( - _In_ unsigned long encryptionAlgorithm, - _Out_writes_bytes_to_opt_( cbKey, *pcbActual ) void * pvKey, - _In_ unsigned long cbKey, - _Out_opt_ unsigned long * pcbActual ); + _In_ JET_UINT32 encryptionAlgorithm, + _Out_writes_bytes_to_opt_( cbKey, *pcbActual ) JET_PVOID pvKey, + _In_ JET_UINT32 cbKey, + _Out_opt_ JET_UINT32 * pcbActual ); #endif // JET_VERSION >= 0x0A01 // begin_PubEsent @@ -7816,12 +7835,12 @@ JetCreateEncryptionKey( JET_ERR JET_API JetCreateTableA( - _In_ JET_SESID sesid, - _In_ JET_DBID dbid, - _In_ JET_PCSTR szTableName, - _In_ unsigned long lPages, - _In_ unsigned long lDensity, - _Out_ JET_TABLEID * ptableid ); + _In_ JET_SESID sesid, + _In_ JET_DBID dbid, + _In_ JET_PCSTR szTableName, + _In_ JET_UINT32 lPages, + _In_ JET_UINT32 lDensity, + _Out_ JET_TABLEID * ptableid ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -7833,12 +7852,12 @@ JetCreateTableA( JET_ERR JET_API JetCreateTableW( - _In_ JET_SESID sesid, - _In_ JET_DBID dbid, - _In_ JET_PCWSTR szTableName, - _In_ unsigned long lPages, - _In_ unsigned long lDensity, - _Out_ JET_TABLEID * ptableid ); + _In_ JET_SESID sesid, + _In_ JET_DBID dbid, + _In_ JET_PCWSTR szTableName, + _In_ JET_UINT32 lPages, + _In_ JET_UINT32 lDensity, + _Out_ JET_TABLEID * ptableid ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -7859,9 +7878,9 @@ JetCreateTableW( JET_ERR JET_API JetCreateTableColumnIndexA( - _In_ JET_SESID sesid, - _In_ JET_DBID dbid, - _Inout_ JET_TABLECREATE_A * ptablecreate ); + _In_ JET_SESID sesid, + _In_ JET_DBID dbid, + _Inout_ JET_TABLECREATE_A * ptablecreate ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -7873,9 +7892,9 @@ JetCreateTableColumnIndexA( JET_ERR JET_API JetCreateTableColumnIndexW( - _In_ JET_SESID sesid, - _In_ JET_DBID dbid, - _Inout_ JET_TABLECREATE_W * ptablecreate ); + _In_ JET_SESID sesid, + _In_ JET_DBID dbid, + _Inout_ JET_TABLECREATE_W * ptablecreate ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -8127,12 +8146,12 @@ JET_ERR JET_API JetRenameTableW( JET_ERR JET_API JetGetTableColumnInfoA( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _In_opt_ JET_PCSTR szColumnName, - _Out_writes_bytes_( cbMax ) void * pvResult, - _In_ unsigned long cbMax, - _In_ unsigned long InfoLevel ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _In_opt_ JET_PCSTR szColumnName, + _Out_writes_bytes_( cbMax ) JET_PVOID pvResult, + _In_ JET_UINT32 cbMax, + _In_ JET_UINT32 InfoLevel ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -8143,12 +8162,12 @@ JetGetTableColumnInfoA( #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) JET_ERR JET_API JetGetTableColumnInfoW( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _In_opt_ JET_PCWSTR szColumnName, - _Out_writes_bytes_( cbMax ) void * pvResult, - _In_ unsigned long cbMax, - _In_ unsigned long InfoLevel ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _In_opt_ JET_PCWSTR szColumnName, + _Out_writes_bytes_( cbMax ) JET_PVOID pvResult, + _In_ JET_UINT32 cbMax, + _In_ JET_UINT32 InfoLevel ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -8170,13 +8189,13 @@ JET_ERR JET_API JetGetTableColumnInfoW( JET_ERR JET_API JetGetColumnInfoA( - _In_ JET_SESID sesid, - _In_ JET_DBID dbid, - _In_ JET_PCSTR szTableName, - _In_opt_ JET_PCSTR pColumnNameOrId, - _Out_writes_bytes_( cbMax ) void * pvResult, - _In_ unsigned long cbMax, - _In_ unsigned long InfoLevel ); + _In_ JET_SESID sesid, + _In_ JET_DBID dbid, + _In_ JET_PCSTR szTableName, + _In_opt_ JET_PCSTR pColumnNameOrId, + _Out_writes_bytes_( cbMax ) JET_PVOID pvResult, + _In_ JET_UINT32 cbMax, + _In_ JET_UINT32 InfoLevel ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -8187,13 +8206,13 @@ JetGetColumnInfoA( #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) JET_ERR JET_API JetGetColumnInfoW( - _In_ JET_SESID sesid, - _In_ JET_DBID dbid, - _In_ JET_PCWSTR szTableName, - _In_opt_ JET_PCWSTR pwColumnNameOrId, - _Out_writes_bytes_( cbMax ) void * pvResult, - _In_ unsigned long cbMax, - _In_ unsigned long InfoLevel ); + _In_ JET_SESID sesid, + _In_ JET_DBID dbid, + _In_ JET_PCWSTR szTableName, + _In_opt_ JET_PCWSTR pwColumnNameOrId, + _Out_writes_bytes_( cbMax ) JET_PVOID pvResult, + _In_ JET_UINT32 cbMax, + _In_ JET_UINT32 InfoLevel ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -8215,13 +8234,13 @@ JET_ERR JET_API JetGetColumnInfoW( JET_ERR JET_API JetAddColumnA( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _In_ JET_PCSTR szColumnName, - _In_ const JET_COLUMNDEF * pcolumndef, - _In_reads_bytes_opt_( cbDefault ) const void * pvDefault, - _In_ unsigned long cbDefault, - _Out_opt_ JET_COLUMNID * pcolumnid ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _In_ JET_PCSTR szColumnName, + _In_ const JET_COLUMNDEF * pcolumndef, + _In_reads_bytes_opt_( cbDefault ) JET_PCVOID pvDefault, + _In_ JET_UINT32 cbDefault, + _Out_opt_ JET_COLUMNID * pcolumnid ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -8232,13 +8251,13 @@ JetAddColumnA( #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) JET_ERR JET_API JetAddColumnW( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _In_ JET_PCWSTR szColumnName, - _In_ const JET_COLUMNDEF * pcolumndef, - _In_reads_bytes_opt_( cbDefault ) const void * pvDefault, - _In_ unsigned long cbDefault, - _Out_opt_ JET_COLUMNID * pcolumnid ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _In_ JET_PCWSTR szColumnName, + _In_ const JET_COLUMNDEF * pcolumndef, + _In_reads_bytes_opt_( cbDefault ) JET_PCVOID pvDefault, + _In_ JET_UINT32 cbDefault, + _Out_opt_ JET_COLUMNID * pcolumnid ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -8385,13 +8404,13 @@ JetRenameColumnW( JET_ERR JET_API JetSetColumnDefaultValueA( - _In_ JET_SESID sesid, - _In_ JET_DBID dbid, - _In_ JET_PCSTR szTableName, - _In_ JET_PCSTR szColumnName, - _In_reads_bytes_( cbData ) const void * pvData, - _In_ const unsigned long cbData, - _In_ const JET_GRBIT grbit ); + _In_ JET_SESID sesid, + _In_ JET_DBID dbid, + _In_ JET_PCSTR szTableName, + _In_ JET_PCSTR szColumnName, + _In_reads_bytes_( cbData ) JET_PCVOID pvData, + _In_ const JET_UINT32 cbData, + _In_ const JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -8403,13 +8422,13 @@ JetSetColumnDefaultValueA( JET_ERR JET_API JetSetColumnDefaultValueW( - _In_ JET_SESID sesid, - _In_ JET_DBID dbid, - _In_ JET_PCWSTR szTableName, - _In_ JET_PCWSTR szColumnName, - _In_reads_bytes_( cbData ) const void * pvData, - _In_ const unsigned long cbData, - _In_ const JET_GRBIT grbit ); + _In_ JET_SESID sesid, + _In_ JET_DBID dbid, + _In_ JET_PCWSTR szTableName, + _In_ JET_PCWSTR szColumnName, + _In_reads_bytes_( cbData ) JET_PCVOID pvData, + _In_ const JET_UINT32 cbData, + _In_ const JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -8431,12 +8450,12 @@ JetSetColumnDefaultValueW( JET_ERR JET_API JetGetTableIndexInfoA( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _In_opt_ JET_PCSTR szIndexName, - _Out_writes_bytes_( cbResult ) void * pvResult, - _In_ unsigned long cbResult, - _In_ unsigned long InfoLevel ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _In_opt_ JET_PCSTR szIndexName, + _Out_writes_bytes_( cbResult ) JET_PVOID pvResult, + _In_ JET_UINT32 cbResult, + _In_ JET_UINT32 InfoLevel ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -8448,12 +8467,12 @@ JetGetTableIndexInfoA( JET_ERR JET_API JetGetTableIndexInfoW( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _In_opt_ JET_PCWSTR szIndexName, - _Out_writes_bytes_( cbResult ) void * pvResult, - _In_ unsigned long cbResult, - _In_ unsigned long InfoLevel ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _In_opt_ JET_PCWSTR szIndexName, + _Out_writes_bytes_( cbResult ) JET_PVOID pvResult, + _In_ JET_UINT32 cbResult, + _In_ JET_UINT32 InfoLevel ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -8475,13 +8494,13 @@ JetGetTableIndexInfoW( JET_ERR JET_API JetGetIndexInfoA( - _In_ JET_SESID sesid, - _In_ JET_DBID dbid, - _In_ JET_PCSTR szTableName, - _In_opt_ JET_PCSTR szIndexName, - _Out_writes_bytes_( cbResult ) void * pvResult, - _In_ unsigned long cbResult, - _In_ unsigned long InfoLevel ); + _In_ JET_SESID sesid, + _In_ JET_DBID dbid, + _In_ JET_PCSTR szTableName, + _In_opt_ JET_PCSTR szIndexName, + _Out_writes_bytes_( cbResult ) JET_PVOID pvResult, + _In_ JET_UINT32 cbResult, + _In_ JET_UINT32 InfoLevel ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -8493,13 +8512,13 @@ JetGetIndexInfoA( JET_ERR JET_API JetGetIndexInfoW( - _In_ JET_SESID sesid, - _In_ JET_DBID dbid, - _In_ JET_PCWSTR szTableName, - _In_opt_ JET_PCWSTR szIndexName, - _Out_writes_bytes_( cbResult ) void * pvResult, - _In_ unsigned long cbResult, - _In_ unsigned long InfoLevel ); + _In_ JET_SESID sesid, + _In_ JET_DBID dbid, + _In_ JET_PCWSTR szTableName, + _In_opt_ JET_PCWSTR szIndexName, + _Out_writes_bytes_( cbResult ) JET_PVOID pvResult, + _In_ JET_UINT32 cbResult, + _In_ JET_UINT32 InfoLevel ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -8525,9 +8544,9 @@ JetCreateIndexA( _In_ JET_TABLEID tableid, _In_ JET_PCSTR szIndexName, _In_ JET_GRBIT grbit, - _In_reads_bytes_( cbKey ) const char * szKey, - _In_ unsigned long cbKey, - _In_ unsigned long lDensity ); + _In_reads_bytes_( cbKey ) JET_PCSTR szKey, + _In_ JET_UINT32 cbKey, + _In_ JET_UINT32 lDensity ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -8539,13 +8558,13 @@ JetCreateIndexA( JET_ERR JET_API JetCreateIndexW( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _In_ JET_PCWSTR szIndexName, - _In_ JET_GRBIT grbit, - _In_reads_bytes_( cbKey ) const WCHAR * szKey, - _In_ unsigned long cbKey, - _In_ unsigned long lDensity ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _In_ JET_PCWSTR szIndexName, + _In_ JET_GRBIT grbit, + _In_reads_bytes_( cbKey ) JET_PCWSTR szKey, + _In_ JET_UINT32 cbKey, + _In_ JET_UINT32 lDensity ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -8570,7 +8589,7 @@ JetCreateIndex2A( _In_ JET_SESID sesid, _In_ JET_TABLEID tableid, _In_reads_( cIndexCreate ) JET_INDEXCREATE_A * pindexcreate, - _In_ unsigned long cIndexCreate ); + _In_ JET_UINT32 cIndexCreate ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -8585,7 +8604,7 @@ JetCreateIndex2W( _In_ JET_SESID sesid, _In_ JET_TABLEID tableid, _In_reads_( cIndexCreate ) JET_INDEXCREATE_W * pindexcreate, - _In_ unsigned long cIndexCreate ); + _In_ JET_UINT32 cIndexCreate ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -8604,17 +8623,17 @@ JetCreateIndex2W( JET_ERR JET_API JetCreateIndex3A( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _In_reads_( cIndexCreate ) JET_INDEXCREATE2_A *pindexcreate, - _In_ unsigned long cIndexCreate ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _In_reads_( cIndexCreate ) JET_INDEXCREATE2_A * pindexcreate, + _In_ JET_UINT32 cIndexCreate ); JET_ERR JET_API JetCreateIndex3W( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _In_reads_( cIndexCreate ) JET_INDEXCREATE2_W *pindexcreate, - _In_ unsigned long cIndexCreate ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _In_reads_( cIndexCreate ) JET_INDEXCREATE2_W * pindexcreate, + _In_ JET_UINT32 cIndexCreate ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -8634,10 +8653,10 @@ JetCreateIndex3W( JET_ERR JET_API JetCreateIndex4A( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _In_reads_( cIndexCreate ) JET_INDEXCREATE3_A *pindexcreate, - _In_ unsigned long cIndexCreate ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _In_reads_( cIndexCreate ) JET_INDEXCREATE3_A * pindexcreate, + _In_ JET_UINT32 cIndexCreate ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -8647,10 +8666,10 @@ JetCreateIndex4A( JET_ERR JET_API JetCreateIndex4W( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _In_reads_( cIndexCreate ) JET_INDEXCREATE3_W *pindexcreate, - _In_ unsigned long cIndexCreate ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _In_reads_( cIndexCreate ) JET_INDEXCREATE3_W * pindexcreate, + _In_ JET_UINT32 cIndexCreate ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -8723,7 +8742,7 @@ JetBeginTransaction2( JET_ERR JET_API JetBeginTransaction3( _In_ JET_SESID sesid, - _In_ __int64 trxid, + _In_ JET_INT64 trxid, _In_ JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ @@ -8738,10 +8757,10 @@ JetBeginTransaction3( JET_ERR JET_API JetPrepareToCommitTransaction( - _In_ JET_SESID sesid, - _In_reads_bytes_( cbData ) const void * pvData, - _In_ unsigned long cbData, - _In_ JET_GRBIT grbit ); + _In_ JET_SESID sesid, + _In_reads_bytes_( cbData ) JET_PCVOID pvData, + _In_ JET_UINT32 cbData, + _In_ JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) */ #pragma endregion @@ -8761,7 +8780,7 @@ JET_ERR JET_API JetCommitTransaction2( _In_ JET_SESID sesid, _In_ JET_GRBIT grbit, - _In_ unsigned long cmsecDurableCommit, + _In_ JET_UINT32 cmsecDurableCommit, _Out_opt_ JET_COMMIT_ID * pCommitId ); #endif // JET_VERSION >= 0x0602 @@ -8781,11 +8800,11 @@ JetRollback( #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) JET_ERR JET_API JetGetDatabaseInfoA( - _In_ JET_SESID sesid, - _In_ JET_DBID dbid, - _Out_writes_bytes_( cbMax ) void * pvResult, - _In_ unsigned long cbMax, - _In_ unsigned long InfoLevel ); + _In_ JET_SESID sesid, + _In_ JET_DBID dbid, + _Out_writes_bytes_( cbMax ) JET_PVOID pvResult, + _In_ JET_UINT32 cbMax, + _In_ JET_UINT32 InfoLevel ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -8797,11 +8816,11 @@ JET_ERR JET_API JetGetDatabaseInfoA( JET_ERR JET_API JetGetDatabaseInfoW( - _In_ JET_SESID sesid, - _In_ JET_DBID dbid, - _Out_writes_bytes_( cbMax ) void * pvResult, - _In_ unsigned long cbMax, - _In_ unsigned long InfoLevel ); + _In_ JET_SESID sesid, + _In_ JET_DBID dbid, + _Out_writes_bytes_( cbMax ) JET_PVOID pvResult, + _In_ JET_UINT32 cbMax, + _In_ JET_UINT32 InfoLevel ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -8823,10 +8842,10 @@ JetGetDatabaseInfoW( JET_ERR JET_API JetGetDatabaseFileInfoA( - _In_ JET_PCSTR szDatabaseName, - _Out_writes_bytes_( cbMax ) void * pvResult, - _In_ unsigned long cbMax, - _In_ unsigned long InfoLevel ); + _In_ JET_PCSTR szDatabaseName, + _Out_writes_bytes_( cbMax ) JET_PVOID pvResult, + _In_ JET_UINT32 cbMax, + _In_ JET_UINT32 InfoLevel ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -8838,10 +8857,10 @@ JetGetDatabaseFileInfoA( JET_ERR JET_API JetGetDatabaseFileInfoW( - _In_ JET_PCWSTR szDatabaseName, - _Out_writes_bytes_( cbMax ) void * pvResult, - _In_ unsigned long cbMax, - _In_ unsigned long InfoLevel ); + _In_ JET_PCWSTR szDatabaseName, + _Out_writes_bytes_( cbMax ) JET_PVOID pvResult, + _In_ JET_UINT32 cbMax, + _In_ JET_UINT32 InfoLevel ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -8861,17 +8880,17 @@ JetGetDatabaseFileInfoW( JET_ERR JET_API JetGetLogFileInfoA( - _In_ JET_PCSTR szLog, - _Out_writes_bytes_( cbMax ) void * pvResult, - _In_ const unsigned long cbMax, - _In_ const unsigned long InfoLevel ); + _In_ JET_PCSTR szLog, + _Out_writes_bytes_( cbMax ) JET_PVOID pvResult, + _In_ const JET_UINT32 cbMax, + _In_ const JET_UINT32 InfoLevel ); JET_ERR JET_API JetGetLogFileInfoW( - _In_ JET_PCWSTR szLog, - _Out_writes_bytes_( cbMax ) void * pvResult, - _In_ const unsigned long cbMax, - _In_ const unsigned long InfoLevel ); + _In_ JET_PCWSTR szLog, + _Out_writes_bytes_( cbMax ) JET_PVOID pvResult, + _In_ const JET_UINT32 cbMax, + _In_ const JET_UINT32 InfoLevel ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) */ #pragma endregion @@ -8896,7 +8915,7 @@ JetOpenDatabaseA( _In_ JET_SESID sesid, _In_ JET_PCSTR szFilename, _In_opt_ JET_PCSTR szConnect, - _Out_ JET_DBID* pdbid, + _Out_ JET_DBID * pdbid, _In_ JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ @@ -8912,7 +8931,7 @@ JetOpenDatabaseW( _In_ JET_SESID sesid, _In_ JET_PCWSTR szFilename, _In_opt_ JET_PCWSTR szConnect, - _Out_ JET_DBID* pdbid, + _Out_ JET_DBID * pdbid, _In_ JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ @@ -8949,8 +8968,8 @@ JetOpenTableA( _In_ JET_SESID sesid, _In_ JET_DBID dbid, _In_ JET_PCSTR szTableName, - _In_reads_bytes_opt_( cbParameters ) const void * pvParameters, - _In_ unsigned long cbParameters, + _In_reads_bytes_opt_( cbParameters ) JET_PCVOID pvParameters, + _In_ JET_UINT32 cbParameters, _In_ JET_GRBIT grbit, _Out_ JET_TABLEID * ptableid ); @@ -8967,8 +8986,8 @@ JetOpenTableW( _In_ JET_SESID sesid, _In_ JET_DBID dbid, _In_ JET_PCWSTR szTableName, - _In_reads_bytes_opt_( cbParameters ) const void * pvParameters, - _In_ unsigned long cbParameters, + _In_reads_bytes_opt_( cbParameters ) JET_PCVOID pvParameters, + _In_ JET_UINT32 cbParameters, _In_ JET_GRBIT grbit, _Out_ JET_TABLEID * ptableid ); @@ -9026,11 +9045,11 @@ JetDelete( JET_ERR JET_API JetUpdate( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _Out_writes_bytes_to_opt_( cbBookmark, *pcbActual ) void * pvBookmark, - _In_ unsigned long cbBookmark, - _Out_opt_ unsigned long * pcbActual ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _Out_writes_bytes_to_opt_( cbBookmark, *pcbActual ) JET_PVOID pvBookmark, + _In_ JET_UINT32 cbBookmark, + _Out_opt_ JET_UINT32 * pcbActual ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -9042,12 +9061,12 @@ JetUpdate( JET_ERR JET_API JetUpdate2( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _Out_writes_bytes_to_opt_( cbBookmark, *pcbActual ) void * pvBookmark, - _In_ unsigned long cbBookmark, - _Out_opt_ unsigned long * pcbActual, - _In_ const JET_GRBIT grbit ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _Out_writes_bytes_to_opt_( cbBookmark, *pcbActual ) JET_PVOID pvBookmark, + _In_ JET_UINT32 cbBookmark, + _Out_opt_ JET_UINT32 * pcbActual, + _In_ const JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -9059,33 +9078,33 @@ JetUpdate2( JET_ERR JET_API JetEscrowUpdate( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _In_ JET_COLUMNID columnid, - _In_reads_bytes_( cbMax ) void * pv, - _In_ unsigned long cbMax, - _Out_writes_bytes_to_opt_( cbOldMax, *pcbOldActual ) void * pvOld, - _In_ unsigned long cbOldMax, - _Out_opt_ unsigned long * pcbOldActual, - _In_ JET_GRBIT grbit ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _In_ JET_COLUMNID columnid, + _In_reads_bytes_( cbMax ) JET_PVOID pv, + _In_ JET_UINT32 cbMax, + _Out_writes_bytes_to_opt_( cbOldMax, *pcbOldActual ) JET_PVOID pvOld, + _In_ JET_UINT32 cbOldMax, + _Out_opt_ JET_UINT32 * pcbOldActual, + _In_ JET_GRBIT grbit ); JET_ERR JET_API JetRetrieveColumn( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _In_ JET_COLUMNID columnid, - _Out_writes_bytes_to_opt_( cbData, min( cbData, *pcbActual ) ) void * pvData, - _In_ unsigned long cbData, - _Out_opt_ unsigned long * pcbActual, - _In_ JET_GRBIT grbit, - _Inout_opt_ JET_RETINFO * pretinfo ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _In_ JET_COLUMNID columnid, + _Out_writes_bytes_to_opt_( cbData, min( cbData, *pcbActual ) ) JET_PVOID pvData, + _In_ JET_UINT32 cbData, + _Out_opt_ JET_UINT32 * pcbActual, + _In_ JET_GRBIT grbit, + _Inout_opt_ JET_RETINFO * pretinfo ); JET_ERR JET_API JetRetrieveColumns( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _Inout_updates_opt_( cretrievecolumn ) JET_RETRIEVECOLUMN * pretrievecolumn, - _In_ unsigned long cretrievecolumn ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _Inout_updates_opt_( cretrievecolumn ) JET_RETRIEVECOLUMN * pretrievecolumn, + _In_ JET_UINT32 cretrievecolumn ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -9097,16 +9116,16 @@ JetRetrieveColumns( JET_ERR JET_API JetEnumerateColumns( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _In_ unsigned long cEnumColumnId, - _In_reads_opt_( cEnumColumnId ) JET_ENUMCOLUMNID * rgEnumColumnId, - _Out_ unsigned long * pcEnumColumn, - _Outptr_result_buffer_( *pcEnumColumn ) JET_ENUMCOLUMN ** prgEnumColumn, - _In_ JET_PFNREALLOC pfnRealloc, - _In_opt_ void * pvReallocContext, - _In_ unsigned long cbDataMost, - _In_ JET_GRBIT grbit ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _In_ JET_UINT32 cEnumColumnId, + _In_reads_opt_( cEnumColumnId ) JET_ENUMCOLUMNID * rgEnumColumnId, + _Out_ JET_UINT32 * pcEnumColumn, + _Outptr_result_buffer_( *pcEnumColumn ) JET_ENUMCOLUMN ** prgEnumColumn, + _In_ JET_PFNREALLOC pfnRealloc, + _In_opt_ JET_PVOID pvReallocContext, + _In_ JET_UINT32 cbDataMost, + _In_ JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -9120,13 +9139,13 @@ JetEnumerateColumns( JET_ERR JET_API JetRetrieveTaggedColumnList( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _Out_ unsigned long * pcColumns, - _Out_writes_bytes_to_opt_( cbData, *pcColumns * sizeof( JET_RETRIEVEMULTIVALUECOUNT ) ) void * pvData, - _In_ unsigned long cbData, - _In_ JET_COLUMNID columnidStart, - _In_ JET_GRBIT grbit ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _Out_ JET_UINT32 * pcColumns, + _Out_writes_bytes_to_opt_( cbData, *pcColumns * sizeof( JET_RETRIEVEMULTIVALUECOUNT ) ) JET_PVOID pvData, + _In_ JET_UINT32 cbData, + _In_ JET_COLUMNID columnidStart, + _In_ JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) */ #pragma endregion @@ -9174,7 +9193,7 @@ JET_ERR JET_API JetGetRecordSize3( _In_ JET_SESID sesid, _In_ JET_TABLEID tableid, - _Inout_ JET_RECSIZE3 * precsize, + _Inout_ JET_RECSIZE3 * precsize, _In_ const JET_GRBIT grbit ); #endif // JET_VERSION >= 0x0A01 @@ -9186,33 +9205,33 @@ JetGetRecordSize3( JET_ERR JET_API JetSetColumn( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _In_ JET_COLUMNID columnid, - _In_reads_bytes_opt_( cbData ) const void * pvData, - _In_ unsigned long cbData, - _In_ JET_GRBIT grbit, - _In_opt_ JET_SETINFO * psetinfo ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _In_ JET_COLUMNID columnid, + _In_reads_bytes_opt_( cbData ) JET_PCVOID pvData, + _In_ JET_UINT32 cbData, + _In_ JET_GRBIT grbit, + _In_opt_ JET_SETINFO * psetinfo ); JET_ERR JET_API JetSetColumns( _In_ JET_SESID sesid, _In_ JET_TABLEID tableid, _In_reads_opt_( csetcolumn ) JET_SETCOLUMN * psetcolumn, - _In_ unsigned long csetcolumn ); + _In_ JET_UINT32 csetcolumn ); JET_ERR JET_API JetPrepareUpdate( _In_ JET_SESID sesid, _In_ JET_TABLEID tableid, - _In_ unsigned long prep ); + _In_ JET_UINT32 prep ); JET_ERR JET_API JetGetRecordPosition( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _Out_writes_bytes_( cbRecpos ) JET_RECPOS * precpos, - _In_ unsigned long cbRecpos ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _Out_writes_bytes_( cbRecpos ) JET_RECPOS * precpos, + _In_ JET_UINT32 cbRecpos ); JET_ERR JET_API JetGotoPosition( @@ -9228,18 +9247,18 @@ JetGotoPosition( JET_ERR JET_API JetGetCursorInfo( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _Out_writes_bytes_( cbMax ) void * pvResult, - _In_ unsigned long cbMax, - _In_ unsigned long InfoLevel ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _Out_writes_bytes_( cbMax ) JET_PVOID pvResult, + _In_ JET_UINT32 cbMax, + _In_ JET_UINT32 InfoLevel ); JET_ERR JET_API JetDupCursor( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _Out_ JET_TABLEID * ptableid, - _In_ JET_GRBIT grbit ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _Out_ JET_TABLEID * ptableid, + _In_ JET_GRBIT grbit ); #if ( JET_VERSION < 0x0600 ) #define JetGetCurrentIndexA JetGetCurrentIndex @@ -9250,7 +9269,7 @@ JetGetCurrentIndexA( _In_ JET_SESID sesid, _In_ JET_TABLEID tableid, _Out_writes_bytes_( cbIndexName ) JET_PSTR szIndexName, - _In_ unsigned long cbIndexName ); + _In_ JET_UINT32 cbIndexName ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -9265,7 +9284,7 @@ JetGetCurrentIndexW( _In_ JET_SESID sesid, _In_ JET_TABLEID tableid, _Out_writes_bytes_( cbIndexName ) JET_PWSTR szIndexName, - _In_ unsigned long cbIndexName ); + _In_ JET_UINT32 cbIndexName ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -9368,7 +9387,7 @@ JetSetCurrentIndex3A( _In_ JET_TABLEID tableid, _In_opt_ JET_PCSTR szIndexName, _In_ JET_GRBIT grbit, - _In_ unsigned long itagSequence ); + _In_ JET_UINT32 itagSequence ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -9384,7 +9403,7 @@ JetSetCurrentIndex3W( _In_ JET_TABLEID tableid, _In_opt_ JET_PCWSTR szIndexName, _In_ JET_GRBIT grbit, - _In_ unsigned long itagSequence ); + _In_ JET_UINT32 itagSequence ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -9411,7 +9430,7 @@ JetSetCurrentIndex4A( _In_opt_ JET_PCSTR szIndexName, _In_opt_ JET_INDEXID * pindexid, _In_ JET_GRBIT grbit, - _In_ unsigned long itagSequence ); + _In_ JET_UINT32 itagSequence ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -9428,7 +9447,7 @@ JetSetCurrentIndex4W( _In_opt_ JET_PCWSTR szIndexName, _In_opt_ JET_INDEXID * pindexid, _In_ JET_GRBIT grbit, - _In_ unsigned long itagSequence ); + _In_ JET_UINT32 itagSequence ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -9448,17 +9467,17 @@ JET_ERR JET_API JetMove( _In_ JET_SESID sesid, _In_ JET_TABLEID tableid, - _In_ long cRow, + _In_ JET_INT32 cRow, _In_ JET_GRBIT grbit ); #if ( JET_VERSION >= 0x0602 ) JET_ERR JET_API JetSetCursorFilter( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _In_reads_( cColumnFilters ) JET_INDEX_COLUMN *rgColumnFilters, - _In_ unsigned long cColumnFilters, - _In_ JET_GRBIT grbit ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _In_reads_( cColumnFilters ) JET_INDEX_COLUMN * rgColumnFilters, + _In_ JET_UINT32 cColumnFilters, + _In_ JET_GRBIT grbit ); #endif // JET_VERSION >= 0x0602 #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ @@ -9481,11 +9500,11 @@ JetGetLock( JET_ERR JET_API JetMakeKey( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _In_reads_bytes_opt_( cbData ) const void * pvData, - _In_ unsigned long cbData, - _In_ JET_GRBIT grbit ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _In_reads_bytes_opt_( cbData ) JET_PCVOID pvData, + _In_ JET_UINT32 cbData, + _In_ JET_GRBIT grbit ); JET_ERR JET_API JetSeek( @@ -9501,14 +9520,15 @@ JetSeek( #pragma region Application Family or Esent Package #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) +// FOOBY JET_ERR JET_API JetPrereadKeys( _In_ JET_SESID sesid, _In_ JET_TABLEID tableid, - _In_reads_(ckeys) const void ** rgpvKeys, - _In_reads_(ckeys) const unsigned long * rgcbKeys, - _In_ long ckeys, - _Out_opt_ long * pckeysPreread, + _In_reads_(ckeys) JET_PCVOID * rgpvKeys, + _In_reads_(ckeys) const JET_UINT32 * rgcbKeys, + _In_ JET_INT32 ckeys, + _Out_opt_ JET_INT32 * pckeysPreread, _In_ JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ @@ -9526,21 +9546,21 @@ JetPrereadIndexRanges( _In_ JET_SESID sesid, _In_ JET_TABLEID tableid, _In_reads_(cIndexRanges) const JET_INDEX_RANGE * const rgIndexRanges, - _In_ const unsigned long cIndexRanges, - _Out_opt_ unsigned long * const pcRangesPreread, + _In_ const JET_UINT32 cIndexRanges, + _Out_opt_ JET_UINT32 * const pcRangesPreread, _In_reads_(ccolumnidPreread) const JET_COLUMNID * const rgcolumnidPreread, - _In_ const unsigned long ccolumnidPreread, + _In_ const JET_UINT32 ccolumnidPreread, _In_ JET_GRBIT grbit ); // JET_bitPrereadForward, JET_bitPrereadBackward #endif // JET_VERSION >= 0x0602 JET_ERR JET_API JetGetBookmark( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) void * pvBookmark, - _In_ unsigned long cbMax, - _Out_opt_ unsigned long * pcbActual ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PVOID pvBookmark, + _In_ JET_UINT32 cbMax, + _Out_opt_ JET_UINT32 * pcbActual ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -9552,15 +9572,15 @@ JetGetBookmark( JET_ERR JET_API JetGetSecondaryIndexBookmark( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _Out_writes_bytes_to_opt_( cbSecondaryKeyMax, *pcbSecondaryKeyActual ) void * pvSecondaryKey, - _In_ unsigned long cbSecondaryKeyMax, - _Out_opt_ unsigned long * pcbSecondaryKeyActual, - _Out_writes_bytes_to_opt_( cbPrimaryBookmarkMax, *pcbPrimaryBookmarkActual ) void * pvPrimaryBookmark, - _In_ unsigned long cbPrimaryBookmarkMax, - _Out_opt_ unsigned long * pcbPrimaryBookmarkActual, - _In_ const JET_GRBIT grbit ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _Out_writes_bytes_to_opt_( cbSecondaryKeyMax, *pcbSecondaryKeyActual ) JET_PVOID pvSecondaryKey, + _In_ JET_UINT32 cbSecondaryKeyMax, + _Out_opt_ JET_UINT32 * pcbSecondaryKeyActual, + _Out_writes_bytes_to_opt_( cbPrimaryBookmarkMax, *pcbPrimaryBookmarkActual ) JET_PVOID pvPrimaryBookmark, + _In_ JET_UINT32 cbPrimaryBookmarkMax, + _Out_opt_ JET_UINT32 * pcbPrimaryBookmarkActual, + _In_ const JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -9624,8 +9644,8 @@ JetDefragmentA( _In_ JET_SESID sesid, _In_ JET_DBID dbid, _In_opt_ JET_PCSTR szTableName, - _Inout_opt_ unsigned long * pcPasses, - _Inout_opt_ unsigned long * pcSeconds, + _Inout_opt_ JET_UINT32 * pcPasses, + _Inout_opt_ JET_UINT32 * pcSeconds, _In_ JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ @@ -9641,8 +9661,8 @@ JetDefragmentW( _In_ JET_SESID sesid, _In_ JET_DBID dbid, _In_opt_ JET_PCWSTR szTableName, - _Inout_opt_ unsigned long * pcPasses, - _Inout_opt_ unsigned long * pcSeconds, + _Inout_opt_ JET_UINT32 * pcPasses, + _Inout_opt_ JET_UINT32 * pcSeconds, _In_ JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ @@ -9669,8 +9689,8 @@ JetDefragment2A( _In_ JET_SESID sesid, _In_ JET_DBID dbid, _In_opt_ JET_PCSTR szTableName, - _Inout_opt_ unsigned long * pcPasses, - _Inout_opt_ unsigned long * pcSeconds, + _Inout_opt_ JET_UINT32 * pcPasses, + _Inout_opt_ JET_UINT32 * pcSeconds, _In_ JET_CALLBACK callback, _In_ JET_GRBIT grbit ); @@ -9687,8 +9707,8 @@ JetDefragment2W( _In_ JET_SESID sesid, _In_ JET_DBID dbid, _In_opt_ JET_PCWSTR szTableName, - _Inout_opt_ unsigned long * pcPasses, - _Inout_opt_ unsigned long * pcSeconds, + _Inout_opt_ JET_UINT32 * pcPasses, + _Inout_opt_ JET_UINT32 * pcSeconds, _In_ JET_CALLBACK callback, _In_ JET_GRBIT grbit ); @@ -9715,10 +9735,10 @@ JetDefragment3A( _In_ JET_SESID sesid, _In_ JET_PCSTR szDatabaseName, _In_opt_ JET_PCSTR szTableName, - _Inout_opt_ unsigned long * pcPasses, - _Inout_opt_ unsigned long * pcSeconds, + _Inout_opt_ JET_UINT32 * pcPasses, + _Inout_opt_ JET_UINT32 * pcSeconds, _In_ JET_CALLBACK callback, - _In_ void * pvContext, + _In_ JET_PVOID pvContext, _In_ JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ @@ -9734,10 +9754,10 @@ JetDefragment3W( _In_ JET_SESID sesid, _In_ JET_PCWSTR szDatabaseName, _In_opt_ JET_PCWSTR szTableName, - _Inout_opt_ unsigned long * pcPasses, - _Inout_opt_ unsigned long * pcSeconds, + _Inout_opt_ JET_UINT32 * pcPasses, + _Inout_opt_ JET_UINT32 * pcSeconds, _In_ JET_CALLBACK callback, - _In_ void * pvContext, + _In_ JET_PVOID pvContext, _In_ JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ @@ -9761,12 +9781,12 @@ JetDefragment3W( JET_ERR JET_API JetDatabaseScan( - _In_ JET_SESID sesid, - _In_ JET_DBID dbid, - _Inout_opt_ unsigned long * pcSecondsMax, - _In_ unsigned long cmsecSleep, - _In_ JET_CALLBACK pfnCallback, - _In_ JET_GRBIT grbit ); + _In_ JET_SESID sesid, + _In_ JET_DBID dbid, + _Inout_opt_ JET_UINT32 * pcSecondsMax, + _In_ JET_UINT32 cmsecSleep, + _In_ JET_CALLBACK pfnCallback, + _In_ JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) */ #pragma endregion @@ -9782,11 +9802,11 @@ JetDatabaseScan( JET_ERR JET_API JetConvertDDLA( - _In_ JET_SESID sesid, - _In_ JET_DBID dbid, - _In_ JET_OPDDLCONV convtyp, - _Out_writes_bytes_( cbData ) void * pvData, - _In_ unsigned long cbData ); + _In_ JET_SESID sesid, + _In_ JET_DBID dbid, + _In_ JET_OPDDLCONV convtyp, + _Out_writes_bytes_( cbData ) JET_PVOID pvData, + _In_ JET_UINT32 cbData ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) */ #pragma endregion @@ -9798,11 +9818,11 @@ JetConvertDDLA( JET_ERR JET_API JetConvertDDLW( - _In_ JET_SESID sesid, - _In_ JET_DBID dbid, - _In_ JET_OPDDLCONV convtyp, - _Out_writes_bytes_( cbData ) void * pvData, - _In_ unsigned long cbData ); + _In_ JET_SESID sesid, + _In_ JET_DBID dbid, + _In_ JET_OPDDLCONV convtyp, + _Out_writes_bytes_( cbData ) JET_PVOID pvData, + _In_ JET_UINT32 cbData ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) */ #pragma endregion @@ -9866,14 +9886,14 @@ JET_ERR JET_API JetSetMaxDatabaseSize( _In_ JET_SESID sesid, _In_ JET_DBID dbid, - _In_ unsigned long cpg, + _In_ JET_UINT32 cpg, _In_ JET_GRBIT grbit ); JET_ERR JET_API JetGetMaxDatabaseSize( _In_ JET_SESID sesid, _In_ JET_DBID dbid, - _Out_ unsigned long * pcpg, + _Out_ JET_UINT32 * pcpg, _In_ JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) */ @@ -9893,8 +9913,8 @@ JET_ERR JET_API JetSetDatabaseSizeA( _In_ JET_SESID sesid, _In_ JET_PCSTR szDatabaseName, - _In_ unsigned long cpg, - _Out_ unsigned long * pcpgReal ); + _In_ JET_UINT32 cpg, + _Out_ JET_UINT32 * pcpgReal ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -9908,8 +9928,8 @@ JET_ERR JET_API JetSetDatabaseSizeW( _In_ JET_SESID sesid, _In_ JET_PCWSTR szDatabaseName, - _In_ unsigned long cpg, - _Out_ unsigned long * pcpgReal ); + _In_ JET_UINT32 cpg, + _Out_ JET_UINT32 * pcpgReal ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -9928,8 +9948,8 @@ JET_ERR JET_API JetGrowDatabase( _In_ JET_SESID sesid, _In_ JET_DBID dbid, - _In_ unsigned long cpg, - _In_ unsigned long * pcpgReal ); + _In_ JET_UINT32 cpg, + _In_ JET_UINT32 * pcpgReal ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -9942,8 +9962,8 @@ JET_ERR JET_API JetResizeDatabase( _In_ JET_SESID sesid, _In_ JET_DBID dbid, - _In_ unsigned long cpgTarget, - _Out_ unsigned long * pcpgActual, + _In_ JET_UINT32 cpgTarget, + _Out_ JET_UINT32 * pcpgActual, _In_ const JET_GRBIT grbit ); #endif // JET_VERSION >= 0x0602 @@ -9965,10 +9985,10 @@ JetResetSessionContext( #if ( JET_VERSION < 0x0600 ) #define JetDBUtilitiesA JetDBUtilities #endif -JET_ERR JET_API JetDBUtilitiesA( JET_DBUTIL_A *pdbutil ); +JET_ERR JET_API JetDBUtilitiesA( JET_DBUTIL_A * pdbutil ); #if ( JET_VERSION >= 0x0600 ) -JET_ERR JET_API JetDBUtilitiesW( JET_DBUTIL_W *pdbutil ); +JET_ERR JET_API JetDBUtilitiesW( JET_DBUTIL_W * pdbutil ); #ifdef JET_UNICODE #define JetDBUtilities JetDBUtilitiesW #else @@ -9984,10 +10004,10 @@ JET_ERR JET_API JetDBUtilitiesW( JET_DBUTIL_W *pdbutil ); JET_ERR JET_API JetGotoBookmark( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _In_reads_bytes_( cbBookmark ) void * pvBookmark, - _In_ unsigned long cbBookmark ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _In_reads_bytes_( cbBookmark ) JET_PVOID pvBookmark, + _In_ JET_UINT32 cbBookmark ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -9999,13 +10019,13 @@ JetGotoBookmark( JET_ERR JET_API JetGotoSecondaryIndexBookmark( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _In_reads_bytes_( cbSecondaryKey ) void * pvSecondaryKey, - _In_ unsigned long cbSecondaryKey, - _In_reads_bytes_opt_( cbPrimaryBookmark ) void * pvPrimaryBookmark, - _In_ unsigned long cbPrimaryBookmark, - _In_ const JET_GRBIT grbit ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _In_reads_bytes_( cbSecondaryKey ) JET_PVOID pvSecondaryKey, + _In_ JET_UINT32 cbSecondaryKey, + _In_reads_bytes_opt_( cbPrimaryBookmark ) JET_PVOID pvPrimaryBookmark, + _In_ JET_UINT32 cbPrimaryBookmark, + _In_ const JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ @@ -10020,7 +10040,7 @@ JET_ERR JET_API JetIntersectIndexes( _In_ JET_SESID sesid, _In_reads_( cindexrange ) JET_INDEXRANGE * rgindexrange, - _In_ unsigned long cindexrange, + _In_ JET_UINT32 cindexrange, _Inout_ JET_RECORDLIST * precordlist, _In_ JET_GRBIT grbit ); @@ -10038,8 +10058,8 @@ JetComputeStats( JET_ERR JET_API JetOpenTempTable( _In_ JET_SESID sesid, - _In_reads_( ccolumn ) const JET_COLUMNDEF * prgcolumndef, - _In_ unsigned long ccolumn, + _In_reads_( ccolumn ) const JET_COLUMNDEF * prgcolumndef, + _In_ JET_UINT32 ccolumn, _In_ JET_GRBIT grbit, _Out_ JET_TABLEID * ptableid, _Out_writes_( ccolumn ) JET_COLUMNID * prgcolumnid ); @@ -10047,9 +10067,9 @@ JetOpenTempTable( JET_ERR JET_API JetOpenTempTable2( _In_ JET_SESID sesid, - _In_reads_( ccolumn ) const JET_COLUMNDEF * prgcolumndef, - _In_ unsigned long ccolumn, - _In_ unsigned long lcid, + _In_reads_( ccolumn ) const JET_COLUMNDEF * prgcolumndef, + _In_ JET_UINT32 ccolumn, + _In_ JET_UINT32 lcid, _In_ JET_GRBIT grbit, _Out_ JET_TABLEID * ptableid, _Out_writes_( ccolumn ) JET_COLUMNID * prgcolumnid ); @@ -10063,8 +10083,8 @@ JetOpenTempTable2( JET_ERR JET_API JetOpenTempTable3( _In_ JET_SESID sesid, - _In_reads_( ccolumn ) const JET_COLUMNDEF * prgcolumndef, - _In_ unsigned long ccolumn, + _In_reads_( ccolumn ) const JET_COLUMNDEF * prgcolumndef, + _In_ JET_UINT32 ccolumn, _In_opt_ JET_UNICODEINDEX * pidxunicode, _In_ JET_GRBIT grbit, _Out_ JET_TABLEID * ptableid, @@ -10313,8 +10333,8 @@ JET_ERR JET_API JetIndexRecordCount( _In_ JET_SESID sesid, _In_ JET_TABLEID tableid, - _Out_ unsigned long * pcrec, - _In_ unsigned long crecMax ); + _Out_ JET_UINT32 * pcrec, + _In_ JET_UINT32 crecMax ); // end_PubEsent #if ( JET_VERSION >= 0x0A01 ) @@ -10323,20 +10343,20 @@ JET_ERR JET_API JetIndexRecordCount2( _In_ JET_SESID sesid, _In_ JET_TABLEID tableid, - _Out_ unsigned __int64 * pcrec, - _In_ unsigned __int64 crecMax ); + _Out_ JET_UINT64 * pcrec, + _In_ JET_UINT64 crecMax ); #endif // JET_VERSION >= 0x0A01 // begin_PubEsent JET_ERR JET_API JetRetrieveKey( - _In_ JET_SESID sesid, - _In_ JET_TABLEID tableid, - _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) void * pvKey, - _In_ unsigned long cbMax, - _Out_opt_ unsigned long * pcbActual, - _In_ JET_GRBIT grbit ); + _In_ JET_SESID sesid, + _In_ JET_TABLEID tableid, + _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PVOID pvKey, + _In_ JET_UINT32 cbMax, + _Out_opt_ JET_UINT32 * pcbActual, + _In_ JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -10372,8 +10392,8 @@ JET_ERR JET_API JetBeginExternalBackupInstance( JET_ERR JET_API JetBeginSurrogateBackup( _In_ JET_INSTANCE instance, - _In_ unsigned long lgenFirst, - _In_ unsigned long lgenLast, + _In_ JET_UINT32 lgenFirst, + _In_ JET_UINT32 lgenLast, _In_ JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) */ @@ -10392,12 +10412,12 @@ JET_ERR JET_API JetBeginSurrogateBackup( JET_ERR JET_API JetGetAttachInfoA( #if ( JET_VERSION < 0x0600 ) - _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) void * pv, + _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PVOID pv, #else _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PSTR szzDatabases, #endif - _In_ unsigned long cbMax, - _Out_opt_ unsigned long * pcbActual ); + _In_ JET_UINT32 cbMax, + _Out_opt_ JET_UINT32 * pcbActual ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -10410,8 +10430,8 @@ JetGetAttachInfoA( JET_ERR JET_API JetGetAttachInfoW( _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PWSTR wszzDatabases, - _In_ unsigned long cbMax, - _Out_opt_ unsigned long * pcbActual ); + _In_ JET_UINT32 cbMax, + _Out_opt_ JET_UINT32 * pcbActual ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -10434,14 +10454,14 @@ JetGetAttachInfoW( JET_ERR JET_API JetGetAttachInfoInstanceA( - _In_ JET_INSTANCE instance, + _In_ JET_INSTANCE instance, #if ( JET_VERSION < 0x0600 ) - _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) void * pv, + _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PVOID pv, #else - _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PSTR szzDatabases, + _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PSTR szzDatabases, #endif - _In_ unsigned long cbMax, - _Out_opt_ unsigned long * pcbActual ); + _In_ JET_UINT32 cbMax, + _Out_opt_ JET_UINT32 * pcbActual ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -10455,8 +10475,8 @@ JET_ERR JET_API JetGetAttachInfoInstanceW( _In_ JET_INSTANCE instance, _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PWSTR szzDatabases, - _In_ unsigned long cbMax, - _Out_opt_ unsigned long * pcbActual ); + _In_ JET_UINT32 cbMax, + _Out_opt_ JET_UINT32 * pcbActual ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -10482,8 +10502,8 @@ JET_ERR JET_API JetOpenFileA( _In_ JET_PCSTR szFileName, _Out_ JET_HANDLE * phfFile, - _Out_ unsigned long * pulFileSizeLow, - _Out_ unsigned long * pulFileSizeHigh ); + _Out_ JET_UINT32 * pulFileSizeLow, + _Out_ JET_UINT32 * pulFileSizeHigh ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -10497,8 +10517,8 @@ JET_ERR JET_API JetOpenFileW( _In_ JET_PCWSTR szFileName, _Out_ JET_HANDLE * phfFile, - _Out_ unsigned long * pulFileSizeLow, - _Out_ unsigned long * pulFileSizeHigh ); + _Out_ JET_UINT32 * pulFileSizeLow, + _Out_ JET_UINT32 * pulFileSizeHigh ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -10524,8 +10544,8 @@ JetOpenFileInstanceA( _In_ JET_INSTANCE instance, _In_ JET_PCSTR szFileName, _Out_ JET_HANDLE * phfFile, - _Out_ unsigned long * pulFileSizeLow, - _Out_ unsigned long * pulFileSizeHigh ); + _Out_ JET_UINT32 * pulFileSizeLow, + _Out_ JET_UINT32 * pulFileSizeHigh ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -10540,8 +10560,8 @@ JetOpenFileInstanceW( _In_ JET_INSTANCE instance, _In_ JET_PCWSTR szFileName, _Out_ JET_HANDLE * phfFile, - _Out_ unsigned long * pulFileSizeLow, - _Out_ unsigned long * pulFileSizeHigh ); + _Out_ JET_UINT32 * pulFileSizeLow, + _Out_ JET_UINT32 * pulFileSizeHigh ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -10568,11 +10588,11 @@ JetOpenFileSectionInstanceA( _In_ JET_INSTANCE instance, _In_ JET_PSTR szFile, _Out_ JET_HANDLE * phFile, - _In_ long iSection, - _In_ long cSections, - _In_ unsigned __int64 ibRead, - _Out_ unsigned long * pulSectionSizeLow, - _Out_ long * plSectionSizeHigh ); + _In_ JET_INT32 iSection, + _In_ JET_INT32 cSections, + _In_ JET_UINT64 ibRead, + _Out_ JET_UINT32 * pulSectionSizeLow, + _Out_ JET_INT32 * plSectionSizeHigh ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) */ #pragma endregion @@ -10587,11 +10607,11 @@ JetOpenFileSectionInstanceW( _In_ JET_INSTANCE instance, _In_ JET_PWSTR szFile, _Out_ JET_HANDLE * phFile, - _In_ long iSection, - _In_ long cSections, - _In_ unsigned __int64 ibRead, - _Out_ unsigned long * pulSectionSizeLow, - _Out_ long * plSectionSizeHigh ); + _In_ JET_INT32 iSection, + _In_ JET_INT32 cSections, + _In_ JET_UINT64 ibRead, + _Out_ JET_UINT32 * pulSectionSizeLow, + _Out_ JET_INT32 * plSectionSizeHigh ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) */ #pragma endregion @@ -10611,10 +10631,10 @@ JetOpenFileSectionInstanceW( JET_ERR JET_API JetReadFile( - _In_ JET_HANDLE hfFile, - _Out_writes_bytes_to_( cb, *pcbActual ) void * pv, - _In_ unsigned long cb, - _Out_opt_ unsigned long * pcbActual ); + _In_ JET_HANDLE hfFile, + _Out_writes_bytes_to_( cb, *pcbActual ) JET_PVOID pv, + _In_ JET_UINT32 cb, + _Out_opt_ JET_UINT32 * pcbActual ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -10626,11 +10646,11 @@ JetReadFile( JET_ERR JET_API JetReadFileInstance( - _In_ JET_INSTANCE instance, - _In_ JET_HANDLE hfFile, - _Out_writes_bytes_to_( cb, *pcbActual ) void * pv, - _In_ unsigned long cb, - _Out_opt_ unsigned long * pcbActual ); + _In_ JET_INSTANCE instance, + _In_ JET_HANDLE hfFile, + _Out_writes_bytes_to_( cb, *pcbActual ) JET_PVOID pv, + _In_ JET_UINT32 cb, + _Out_opt_ JET_UINT32 * pcbActual ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -10672,12 +10692,12 @@ JetCloseFileInstance( JET_ERR JET_API JetGetLogInfoA( #if ( JET_VERSION < 0x0600 ) - _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) void * pv, + _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PVOID pv, #else - _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PSTR szzLogs, + _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PSTR szzLogs, #endif - _In_ unsigned long cbMax, - _Out_opt_ unsigned long * pcbActual ); + _In_ JET_UINT32 cbMax, + _Out_opt_ JET_UINT32 * pcbActual ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -10690,8 +10710,8 @@ JetGetLogInfoA( JET_ERR JET_API JetGetLogInfoW( _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PWSTR szzLogs, - _In_ unsigned long cbMax, - _Out_opt_ unsigned long * pcbActual ); + _In_ JET_UINT32 cbMax, + _Out_opt_ JET_UINT32 * pcbActual ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -10714,14 +10734,14 @@ JetGetLogInfoW( JET_ERR JET_API JetGetLogInfoInstanceA( - _In_ JET_INSTANCE instance, + _In_ JET_INSTANCE instance, #if ( JET_VERSION < 0x0600 ) - _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) void * pv, + _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PVOID pv, #else - _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PSTR szzLogs, + _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PSTR szzLogs, #endif - _In_ unsigned long cbMax, - _Out_opt_ unsigned long * pcbActual ); + _In_ JET_UINT32 cbMax, + _Out_opt_ JET_UINT32 * pcbActual ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -10733,10 +10753,10 @@ JetGetLogInfoInstanceA( JET_ERR JET_API JetGetLogInfoInstanceW( - _In_ JET_INSTANCE instance, - _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PWSTR wszzLogs, - _In_ unsigned long cbMax, - _Out_opt_ unsigned long * pcbActual ); + _In_ JET_INSTANCE instance, + _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PWSTR wszzLogs, + _In_ JET_UINT32 cbMax, + _Out_opt_ JET_UINT32 * pcbActual ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -10754,18 +10774,18 @@ JetGetLogInfoInstanceW( #define JET_BASE_NAME_LENGTH 3 typedef struct { - unsigned long cbSize; - unsigned long ulGenLow; - unsigned long ulGenHigh; - char szBaseName[ JET_BASE_NAME_LENGTH + 1 ]; + JET_UINT32 cbSize; + JET_UINT32 ulGenLow; + JET_UINT32 ulGenHigh; + JET_CHAR szBaseName[ JET_BASE_NAME_LENGTH + 1 ]; } JET_LOGINFO_A; typedef struct { - unsigned long cbSize; - unsigned long ulGenLow; - unsigned long ulGenHigh; - WCHAR szBaseName[ JET_BASE_NAME_LENGTH + 1 ]; + JET_UINT32 cbSize; + JET_UINT32 ulGenLow; + JET_UINT32 ulGenHigh; + JET_WCHAR szBaseName[ JET_BASE_NAME_LENGTH + 1 ]; } JET_LOGINFO_W; #ifdef JET_UNICODE @@ -10783,15 +10803,15 @@ typedef struct JET_ERR JET_API JetGetLogInfoInstance2A( - _In_ JET_INSTANCE instance, + _In_ JET_INSTANCE instance, #if ( JET_VERSION < 0x0600 ) - _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) void * pv, + _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PVOID pv, #else - _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PSTR szzLogs, + _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PSTR szzLogs, #endif - _In_ unsigned long cbMax, - _Out_opt_ unsigned long * pcbActual, - _Inout_opt_ JET_LOGINFO_A * pLogInfo ); + _In_ JET_UINT32 cbMax, + _Out_opt_ JET_UINT32 * pcbActual, + _Inout_opt_ JET_LOGINFO_A * pLogInfo ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -10803,11 +10823,11 @@ JetGetLogInfoInstance2A( JET_ERR JET_API JetGetLogInfoInstance2W( - _In_ JET_INSTANCE instance, - _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PWSTR wszzLogs, - _In_ unsigned long cbMax, - _Out_opt_ unsigned long * pcbActual, - _Inout_opt_ JET_LOGINFO_W * pLogInfo ); + _In_ JET_INSTANCE instance, + _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PWSTR wszzLogs, + _In_ JET_UINT32 cbMax, + _Out_opt_ JET_UINT32 * pcbActual, + _Inout_opt_ JET_LOGINFO_W * pLogInfo ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -10829,14 +10849,14 @@ JetGetLogInfoInstance2W( JET_ERR JET_API JetGetTruncateLogInfoInstanceA( - _In_ JET_INSTANCE instance, + _In_ JET_INSTANCE instance, #if ( JET_VERSION < 0x0600 ) - _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) void * pv, + _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PVOID pv, #else - _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PSTR szzLogs, + _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PSTR szzLogs, #endif - _In_ unsigned long cbMax, - _Out_opt_ unsigned long * pcbActual ); + _In_ JET_UINT32 cbMax, + _Out_opt_ JET_UINT32 * pcbActual ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -10848,10 +10868,10 @@ JetGetTruncateLogInfoInstanceA( JET_ERR JET_API JetGetTruncateLogInfoInstanceW( - _In_ JET_INSTANCE instance, - _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PWSTR wszzLogs, - _In_ unsigned long cbMax, - _Out_opt_ unsigned long * pcbActual ); + _In_ JET_INSTANCE instance, + _Out_writes_bytes_to_opt_( cbMax, *pcbActual ) JET_PWSTR wszzLogs, + _In_ JET_UINT32 cbMax, + _Out_opt_ JET_UINT32 * pcbActual ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -10869,7 +10889,7 @@ JetGetTruncateLogInfoInstanceW( #pragma region Desktop Family or Esent Package #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) -JET_ERR JET_API JetTruncateLog( void ); +JET_ERR JET_API JetTruncateLog( JET_VOID ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -10891,7 +10911,7 @@ JetTruncateLogInstance( #pragma region Desktop Family or Esent Package #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) -JET_ERR JET_API JetEndExternalBackup( void ); +JET_ERR JET_API JetEndExternalBackup( JET_VOID ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -10944,10 +10964,10 @@ JetExternalRestoreA( _In_ JET_PSTR szCheckpointFilePath, _In_ JET_PSTR szLogPath, _In_reads_opt_( crstfilemap ) JET_RSTMAP_A * rgrstmap, - _In_ long crstfilemap, + _In_ JET_INT32 crstfilemap, _In_ JET_PSTR szBackupLogPath, - _In_ long genLow, - _In_ long genHigh, + _In_ JET_INT32 genLow, + _In_ JET_INT32 genHigh, _In_ JET_PFNSTATUS pfn ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ @@ -10963,10 +10983,10 @@ JetExternalRestoreW( _In_ JET_PWSTR szCheckpointFilePath, _In_ JET_PWSTR szLogPath, _In_reads_opt_( crstfilemap ) JET_RSTMAP_W * rgrstmap, - _In_ long crstfilemap, + _In_ JET_INT32 crstfilemap, _In_ JET_PWSTR szBackupLogPath, - _In_ long genLow, - _In_ long genHigh, + _In_ JET_INT32 genLow, + _In_ JET_INT32 genHigh, _In_ JET_PFNSTATUS pfn ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ @@ -10993,7 +11013,7 @@ JetExternalRestore2A( _In_ JET_PSTR szCheckpointFilePath, _In_ JET_PSTR szLogPath, _In_reads_opt_( crstfilemap ) JET_RSTMAP_A * rgrstmap, - _In_ long crstfilemap, + _In_ JET_INT32 crstfilemap, _In_ JET_PSTR szBackupLogPath, _Inout_ JET_LOGINFO_A * pLogInfo, _In_opt_ JET_PSTR szTargetInstanceName, @@ -11014,7 +11034,7 @@ JetExternalRestore2W( _In_ JET_PWSTR szCheckpointFilePath, _In_ JET_PWSTR szLogPath, _In_reads_opt_( crstfilemap ) JET_RSTMAP_W * rgrstmap, - _In_ long crstfilemap, + _In_ JET_INT32 crstfilemap, _In_ JET_PWSTR szBackupLogPath, _Inout_ JET_LOGINFO_W * pLogInfo, _In_opt_ JET_PWSTR szTargetInstanceName, @@ -11092,7 +11112,7 @@ JetRegisterCallback( _In_ JET_TABLEID tableid, _In_ JET_CBTYP cbtyp, _In_ JET_CALLBACK pCallback, - _In_opt_ void * pvContext, + _In_opt_ JET_PVOID pvContext, _In_ JET_HANDLE * phCallbackId ); JET_ERR JET_API @@ -11108,23 +11128,23 @@ JetUnregisterCallback( typedef struct _JET_INSTANCE_INFO_A { JET_INSTANCE hInstanceId; - char * szInstanceName; + JET_PSTR szInstanceName; JET_API_PTR cDatabases; - char ** szDatabaseFileName; - char ** szDatabaseDisplayName; - char ** szDatabaseSLVFileName_Obsolete; + JET_PSTR * szDatabaseFileName; + JET_PSTR * szDatabaseDisplayName; + JET_PSTR * szDatabaseSLVFileName_Obsolete; } JET_INSTANCE_INFO_A; typedef struct _JET_INSTANCE_INFO_W { JET_INSTANCE hInstanceId; - WCHAR * szInstanceName; + JET_PWSTR szInstanceName; JET_API_PTR cDatabases; - WCHAR ** szDatabaseFileName; - WCHAR ** szDatabaseDisplayName; - WCHAR ** szDatabaseSLVFileName_Obsolete; + JET_PWSTR * szDatabaseFileName; + JET_PWSTR * szDatabaseDisplayName; + JET_PWSTR * szDatabaseSLVFileName_Obsolete; } JET_INSTANCE_INFO_W; #ifdef JET_UNICODE @@ -11142,8 +11162,8 @@ typedef struct _JET_INSTANCE_INFO_W JET_ERR JET_API JetGetInstanceInfoA( - _Out_ unsigned long * pcInstanceInfo, - _Outptr_result_buffer_( *pcInstanceInfo ) JET_INSTANCE_INFO_A ** paInstanceInfo ); + _Out_ JET_UINT32 * pcInstanceInfo, + _Outptr_result_buffer_( *pcInstanceInfo ) JET_INSTANCE_INFO_A ** paInstanceInfo ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -11155,8 +11175,8 @@ JetGetInstanceInfoA( JET_ERR JET_API JetGetInstanceInfoW( - _Out_ unsigned long * pcInstanceInfo, - _Outptr_result_buffer_( *pcInstanceInfo ) JET_INSTANCE_INFO_W ** paInstanceInfo ); + _Out_ JET_UINT32 * pcInstanceInfo, + _Outptr_result_buffer_( *pcInstanceInfo ) JET_INSTANCE_INFO_W ** paInstanceInfo ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -11173,7 +11193,7 @@ JetGetInstanceInfoW( JET_ERR JET_API JetFreeBuffer( - _Pre_notnull_ char * pbBuf ); + _Pre_notnull_ JET_CHAR * pbBuf ); JET_ERR JET_API JetSetLS( @@ -11214,7 +11234,7 @@ JetTracing( #pragma region Desktop Family or Esent Package #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) -typedef JET_API_PTR JET_OSSNAPID; /* Snapshot Session Identifier */ +typedef JET_API_PTR JET_OSSNAPID; // Snapshot Session Identifier JET_ERR JET_API JetOSSnapshotPrepare( @@ -11235,19 +11255,19 @@ JetOSSnapshotPrepareInstance( JET_ERR JET_API JetOSSnapshotFreezeA( - _In_ const JET_OSSNAPID snapId, - _Out_ unsigned long * pcInstanceInfo, - _Outptr_result_buffer_( *pcInstanceInfo ) JET_INSTANCE_INFO_A ** paInstanceInfo, - _In_ const JET_GRBIT grbit ); + _In_ const JET_OSSNAPID snapId, + _Out_ JET_UINT32 * pcInstanceInfo, + _Outptr_result_buffer_( *pcInstanceInfo ) JET_INSTANCE_INFO_A ** paInstanceInfo, + _In_ const JET_GRBIT grbit ); #if ( JET_VERSION >= 0x0600 ) JET_ERR JET_API JetOSSnapshotFreezeW( - _In_ const JET_OSSNAPID snapId, - _Out_ unsigned long * pcInstanceInfo, - _Outptr_result_buffer_( *pcInstanceInfo ) JET_INSTANCE_INFO_W ** paInstanceInfo, - _In_ const JET_GRBIT grbit ); + _In_ const JET_OSSNAPID snapId, + _Out_ JET_UINT32 * pcInstanceInfo, + _Outptr_result_buffer_( *pcInstanceInfo ) JET_INSTANCE_INFO_W ** paInstanceInfo, + _In_ const JET_GRBIT grbit ); #ifdef JET_UNICODE #define JetOSSnapshotFreeze JetOSSnapshotFreezeW @@ -11303,10 +11323,10 @@ JetOSSnapshotTruncateLogInstance( JET_ERR JET_API JetOSSnapshotGetFreezeInfoA( - _In_ const JET_OSSNAPID snapId, - _Out_ unsigned long * pcInstanceInfo, - _Outptr_result_buffer_( *pcInstanceInfo ) JET_INSTANCE_INFO_A ** paInstanceInfo, - _In_ const JET_GRBIT grbit ); + _In_ const JET_OSSNAPID snapId, + _Out_ JET_UINT32 * pcInstanceInfo, + _Outptr_result_buffer_( *pcInstanceInfo ) JET_INSTANCE_INFO_A ** paInstanceInfo, + _In_ const JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -11316,10 +11336,10 @@ JetOSSnapshotGetFreezeInfoA( JET_ERR JET_API JetOSSnapshotGetFreezeInfoW( - _In_ const JET_OSSNAPID snapId, - _Out_ unsigned long * pcInstanceInfo, - _Outptr_result_buffer_( *pcInstanceInfo ) JET_INSTANCE_INFO_W ** paInstanceInfo, - _In_ const JET_GRBIT grbit ); + _In_ const JET_OSSNAPID snapId, + _Out_ JET_UINT32 * pcInstanceInfo, + _Outptr_result_buffer_( *pcInstanceInfo ) JET_INSTANCE_INFO_W ** paInstanceInfo, + _In_ const JET_GRBIT grbit ); #ifdef JET_UNICODE #define JetOSSnapshotGetFreezeInfo JetOSSnapshotGetFreezeInfoW @@ -11345,12 +11365,12 @@ JetOSSnapshotEnd( JET_ERR JET_API JetGetPageInfo( - _In_reads_bytes_( cbData ) void * const pvPages, // raw page data - _In_ unsigned long cbData, // size of raw page data - _Inout_updates_bytes_( cbPageInfo ) JET_PAGEINFO * rgPageInfo, // array of pageinfo structures - _In_ unsigned long cbPageInfo, // length of buffer for pageinfo array - _In_ JET_GRBIT grbit, // options - _In_ unsigned long ulInfoLevel ); // info level + _In_reads_bytes_( cbData ) const JET_PVOID pvPages, // raw page data + _In_ JET_UINT32 cbData, // size of raw page data + _Inout_updates_bytes_( cbPageInfo ) JET_PAGEINFO * rgPageInfo, // array of pageinfo structures + _In_ JET_UINT32 cbPageInfo, // length of buffer for pageinfo array + _In_ JET_GRBIT grbit, // options + _In_ JET_UINT32 ulInfoLevel ); // info level #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) */ #pragma endregion @@ -11364,23 +11384,23 @@ JetGetPageInfo( JET_ERR JET_API JetGetPageInfo2( - _In_reads_bytes_( cbData ) void * const pvPages, // raw page data - _In_ unsigned long cbData, // size of raw page data - _Inout_updates_bytes_( cbPageInfo ) void * const rgPageInfo, // array of pageinfo structures - _In_ unsigned long cbPageInfo, // length of buffer for pageinfo array - _In_ JET_GRBIT grbit, // options - _In_ unsigned long ulInfoLevel ); // info level + _In_reads_bytes_( cbData ) const JET_PVOID pvPages, // raw page data + _In_ JET_UINT32 cbData, // size of raw page data + _Inout_updates_bytes_( cbPageInfo ) const JET_PVOID rgPageInfo, // array of pageinfo structures + _In_ JET_UINT32 cbPageInfo, // length of buffer for pageinfo array + _In_ JET_GRBIT grbit, // options + _In_ JET_UINT32 ulInfoLevel ); // info level JET_ERR JET_API JetGetDatabasePages( - _In_ JET_SESID sesid, - _In_ JET_DBID dbid, - _In_ unsigned long pgnoStart, - _In_ unsigned long cpg, - _Out_writes_bytes_to_( cb, *pcbActual ) void * pv, - _In_ unsigned long cb, - _Out_ unsigned long * pcbActual, - _In_ JET_GRBIT grbit ); + _In_ JET_SESID sesid, + _In_ JET_DBID dbid, + _In_ JET_UINT32 pgnoStart, + _In_ JET_UINT32 cpg, + _Out_writes_bytes_to_( cb, *pcbActual ) JET_PVOID pv, + _In_ JET_UINT32 cb, + _Out_ JET_UINT32 * pcbActual, + _In_ JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) */ #pragma endregion @@ -11396,14 +11416,14 @@ JetGetDatabasePages( JET_ERR JET_API JetOnlinePatchDatabasePage( - _In_ JET_SESID sesid, - _In_ JET_DBID dbid, - _In_ unsigned long pgno, - _In_reads_bytes_(cbToken) const void * pvToken, - _In_ unsigned long cbToken, - _In_reads_bytes_(cbData) const void * pvData, - _In_ unsigned long cbData, - _In_ JET_GRBIT grbit ); + _In_ JET_SESID sesid, + _In_ JET_DBID dbid, + _In_ JET_UINT32 pgno, + _In_reads_bytes_(cbToken) JET_PCVOID pvToken, + _In_ JET_UINT32 cbToken, + _In_reads_bytes_(cbData) JET_PCVOID pvData, + _In_ JET_UINT32 cbData, + _In_ JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) */ #pragma endregion @@ -11441,14 +11461,14 @@ JET_ERR JET_API JetBeginDatabaseIncrementalReseedA( _In_ JET_INSTANCE instance, _In_ JET_PCSTR szDatabase, - _In_ unsigned long genFirstDivergedLog, + _In_ JET_UINT32 genFirstDivergedLog, _In_ JET_GRBIT grbit ); JET_ERR JET_API JetBeginDatabaseIncrementalReseedW( _In_ JET_INSTANCE instance, _In_ JET_PCWSTR szDatabase, - _In_ unsigned long genFirstDivergedLog, + _In_ JET_UINT32 genFirstDivergedLog, _In_ JET_GRBIT grbit ); #ifdef JET_UNICODE @@ -11461,18 +11481,18 @@ JET_ERR JET_API JetEndDatabaseIncrementalReseedA( _In_ JET_INSTANCE instance, _In_ JET_PCSTR szDatabase, - _In_ unsigned long genMinRequired, - _In_ unsigned long genFirstDivergedLog, - _In_ unsigned long genMaxRequired, + _In_ JET_UINT32 genMinRequired, + _In_ JET_UINT32 genFirstDivergedLog, + _In_ JET_UINT32 genMaxRequired, _In_ JET_GRBIT grbit ); JET_ERR JET_API JetEndDatabaseIncrementalReseedW( _In_ JET_INSTANCE instance, _In_ JET_PCWSTR szDatabase, - _In_ unsigned long genMinRequired, - _In_ unsigned long genFirstDivergedLog, - _In_ unsigned long genMaxRequired, + _In_ JET_UINT32 genMinRequired, + _In_ JET_UINT32 genFirstDivergedLog, + _In_ JET_UINT32 genMaxRequired, _In_ JET_GRBIT grbit ); #ifdef JET_UNICODE @@ -11483,23 +11503,23 @@ JetEndDatabaseIncrementalReseedW( JET_ERR JET_API JetPatchDatabasePagesA( - _In_ JET_INSTANCE instance, - _In_ JET_PCSTR szDatabase, - _In_ unsigned long pgnoStart, - _In_ unsigned long cpg, - _In_reads_bytes_( cb ) const void * pv, - _In_ unsigned long cb, - _In_ JET_GRBIT grbit ); + _In_ JET_INSTANCE instance, + _In_ JET_PCSTR szDatabase, + _In_ JET_UINT32 pgnoStart, + _In_ JET_UINT32 cpg, + _In_reads_bytes_( cb ) JET_PCVOID pv, + _In_ JET_UINT32 cb, + _In_ JET_GRBIT grbit ); JET_ERR JET_API JetPatchDatabasePagesW( - _In_ JET_INSTANCE instance, - _In_ JET_PCWSTR szDatabase, - _In_ unsigned long pgnoStart, - _In_ unsigned long cpg, - _In_reads_bytes_( cb ) const void * pv, - _In_ unsigned long cb, - _In_ JET_GRBIT grbit ); + _In_ JET_INSTANCE instance, + _In_ JET_PCWSTR szDatabase, + _In_ JET_UINT32 pgnoStart, + _In_ JET_UINT32 cpg, + _In_reads_bytes_( cb ) JET_PCVOID pv, + _In_ JET_UINT32 cb, + _In_ JET_GRBIT grbit ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) */ #pragma endregion @@ -11517,17 +11537,17 @@ JetPatchDatabasePagesW( JET_ERR JET_API JetGetRBSFileInfoA( - _In_ JET_PCSTR szRBSFileName, - _Out_writes_bytes_( cbMax ) void * pvResult, - _In_ unsigned long cbMax, - _In_ unsigned long InfoLevel ); + _In_ JET_PCSTR szRBSFileName, + _Out_writes_bytes_( cbMax ) JET_PVOID pvResult, + _In_ JET_UINT32 cbMax, + _In_ JET_UINT32 InfoLevel ); JET_ERR JET_API JetGetRBSFileInfoW( - _In_ JET_PCWSTR szRBSFileName, - _Out_writes_bytes_( cbMax ) void * pvResult, - _In_ unsigned long cbMax, - _In_ unsigned long InfoLevel ); + _In_ JET_PCWSTR szRBSFileName, + _Out_writes_bytes_( cbMax ) JET_PVOID pvResult, + _In_ JET_UINT32 cbMax, + _In_ JET_UINT32 InfoLevel ); #ifdef JET_UNICODE #define JetGetRBSFileInfo JetGetRBSFileInfoW @@ -11539,15 +11559,15 @@ JET_ERR JET_API JetRBSPrepareRevert( _In_ JET_INSTANCE instance, _In_ JET_LOGTIME jltRevertExpected, - _In_ long cpgCache, + _In_ JET_INT32 cpgCache, _In_ JET_GRBIT grbit, - _Out_ JET_LOGTIME* pjltRevertActual ); + _Out_ JET_LOGTIME * pjltRevertActual ); JET_ERR JET_API JetRBSExecuteRevert( - _In_ JET_INSTANCE instance, - _In_ JET_GRBIT grbit, - _Out_ JET_RBSREVERTINFOMISC* prbsrevertinfomisc ); + _In_ JET_INSTANCE instance, + _In_ JET_GRBIT grbit, + _Out_ JET_RBSREVERTINFOMISC * prbsrevertinfomisc ); JET_ERR JET_API JetRBSCancelRevert( @@ -11670,8 +11690,8 @@ enum typedef struct tagJET_TESTHOOKUNITTEST2 { - unsigned long cbStruct; // size of this structure - char * szTestName; // test name / test wildcard + JET_UINT32 cbStruct; // size of this structure + JET_PSTR szTestName; // test name / test wildcard JET_DBID dbidTestOn; // database to perform the internal tests against } JET_TESTHOOKUNITTEST2; @@ -11703,11 +11723,11 @@ typedef enum // pv struct for opTestHookTestInjection typedef struct tagJET_TESTHOOKTESTINJECTION { - unsigned long cbStruct; - unsigned long ulID; + JET_UINT32 cbStruct; + JET_UINT32 ulID; JET_API_PTR pv; JET_TESTINJECTIONTYPE type; - unsigned long ulProbability; + JET_UINT32 ulProbability; JET_GRBIT grbit; } JET_TESTHOOKTESTINJECTION; @@ -11715,40 +11735,40 @@ typedef struct tagJET_TESTHOOKTESTINJECTION // and opTestHookHookGlobalMemoryStatus typedef struct tagJET_TESTHOOKAPIHOOKING { - unsigned long cbStruct; - const void * pfnOld; - const void * pfnNew; + JET_UINT32 cbStruct; + JET_PCVOID pfnOld; + JET_PCVOID pfnNew; } JET_TESTHOOKAPIHOOKING; // pv struct for opTestHookTraceTestMarker typedef struct tagJET_TESTHOOKTRACETESTMARKER { - unsigned long cbStruct; - const char * szAnnotation; - unsigned __int64 qwMarkerID; + JET_UINT32 cbStruct; + JET_PCSTR szAnnotation; + JET_UINT64 qwMarkerID; } JET_TESTHOOKTRACETESTMARKER; // pv struct for opTestHookSetEngineTickTime typedef struct tagJET_TESTHOOKTIMEINJECTION { - unsigned long cbStruct; - unsigned long tickNow; - unsigned long eTimeInjWrapMode; - unsigned long dtickTimeInjWrapOffset; - unsigned long dtickTimeInjAccelerant; + JET_UINT32 cbStruct; + JET_UINT32 tickNow; + JET_UINT32 eTimeInjWrapMode; + JET_UINT32 dtickTimeInjWrapOffset; + JET_UINT32 dtickTimeInjAccelerant; } JET_TESTHOOKTIMEINJECTION; // pv struct for opTestHookCacheQuery typedef struct tagJET_TESTHOOKCACHEQUERY { - unsigned long cbStruct; + JET_UINT32 cbStruct; // in args - long cCacheQuery; - char ** rgszCacheQuery; + JET_INT32 cCacheQuery; + JET_PSTR * rgszCacheQuery; // out arg - void * pvOut; + JET_PVOID pvOut; } JET_TESTHOOKCACHEQUERY; #define JET_bitTestHookEvictDataByPgno 0x00000001 // Specifies that we are evicting data from the database cache, specified by pgno. @@ -11756,7 +11776,7 @@ typedef struct tagJET_TESTHOOKCACHEQUERY // pv struct for opTestHookEvictCache typedef struct tagJET_TESTHOOKEVICTCACHE { - unsigned long cbStruct; + JET_UINT32 cbStruct; JET_API_PTR ulTargetContext; // For ..EvictDataByPgno = JET_DBID JET_API_PTR ulTargetData; // For ..EvictDataByPgno = PageNumber/pgno JET_GRBIT grbit; @@ -11793,7 +11813,7 @@ typedef struct tagJET_TESTHOOKEVICTCACHE typedef struct tagJET_TESTHOOKCORRUPT { - unsigned long cbStruct; + JET_UINT32 cbStruct; JET_GRBIT grbit; union @@ -11802,18 +11822,18 @@ typedef struct tagJET_TESTHOOKCORRUPT #include struct // CorruptDatabaseFile { - JET_PWSTR wszDatabaseFilePath; // Name of the database file - __int64 pgnoTarget; // Page number target, or JET_pgnoTestHookCorruptRandom - __int64 iSubTarget; // Depends upon the JET_bitTestHookCorruptPage* type. + JET_PWSTR wszDatabaseFilePath; // Name of the database file + JET_INT64 pgnoTarget; // Page number target, or JET_pgnoTestHookCorruptRandom + JET_INT64 iSubTarget; // Depends upon the JET_bitTestHookCorruptPage* type. } CorruptDatabaseFile; #include struct // CorruptDatabasePageImage { - JET_API_PTR pbPageImageTarget; // Pointer to the page image to corrupt - unsigned long cbPageImage; - __int64 pgnoTarget; // Page number target (note: this may not seem like it should be required, but it is b/c 4 KB pages xor this into the checksum) - __int64 iSubTarget; // Depends upon the JET_bitTestHookCorruptPage* type. + JET_API_PTR pbPageImageTarget; // Pointer to the page image to corrupt + JET_UINT32 cbPageImage; + JET_INT64 pgnoTarget; // Page number target (note: this may not seem like it should be required, but it is b/c 4 KB pages xor this into the checksum) + JET_INT64 iSubTarget; // Depends upon the JET_bitTestHookCorruptPage* type. } CorruptDatabasePageImage; }; @@ -11832,9 +11852,9 @@ typedef struct tagJET_TESTHOOKCORRUPT typedef struct tagJET_TESTHOOKALTERDBFILEHDR { JET_PWSTR szDatabase; - unsigned long ibField; - unsigned long cbField; - char * pbField; + JET_UINT32 ibField; + JET_UINT32 cbField; + JET_BYTE * pbField; JET_GRBIT grbit; } JET_TESTHOOKALTERDBFILEHDR; @@ -11844,14 +11864,14 @@ typedef struct tagJET_TESTHOOKALTERDBFILEHDR JET_ERR JET_API JetTestHook( _In_ const TESTHOOK_OP opcode, - _Inout_opt_ void * const pv ); + _Inout_opt_ const JET_PVOID pv ); JET_ERR JET_API JetConsumeLogData( _In_ JET_INSTANCE instance, _In_ JET_EMITDATACTX * pEmitLogDataCtx, - _In_ void * pvLogData, - _In_ unsigned long cbLogData, + _In_ JET_PVOID pvLogData, + _In_ JET_UINT32 cbLogData, _In_ JET_GRBIT grbits ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) */ @@ -11867,11 +11887,11 @@ JET_ERR JET_API JetConsumeLogData( #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) JET_ERR JET_API JetGetErrorInfoW( - _In_opt_ void * pvContext, - _Out_writes_bytes_( cbMax ) void * pvResult, - _In_ unsigned long cbMax, - _In_ unsigned long InfoLevel, - _In_ JET_GRBIT grbit ); + _In_opt_ JET_PVOID pvContext, + _Out_writes_bytes_( cbMax ) JET_PVOID pvResult, + _In_ JET_UINT32 cbMax, + _In_ JET_UINT32 InfoLevel, + _In_ JET_GRBIT grbit ); #ifdef JET_UNICODE #define JetGetErrorInfo JetGetErrorInfoW @@ -11882,17 +11902,17 @@ JET_ERR JET_API JetGetErrorInfoW( JET_ERR JET_API JetSetSessionParameter( _In_opt_ JET_SESID sesid, - _In_ unsigned long sesparamid, - _In_reads_bytes_opt_( cbParam ) void * pvParam, - _In_ unsigned long cbParam ); + _In_ JET_UINT32 sesparamid, + _In_reads_bytes_opt_( cbParam ) JET_PVOID pvParam, + _In_ JET_UINT32 cbParam ); JET_ERR JET_API JetGetSessionParameter( _In_opt_ JET_SESID sesid, - _In_ unsigned long sesparamid, - _Out_cap_post_count_(cbParamMax, *pcbParamActual) void * pvParam, - _In_ unsigned long cbParamMax, - _Out_opt_ unsigned long * pcbParamActual ); + _In_ JET_UINT32 sesparamid, + _Out_cap_post_count_(cbParamMax, *pcbParamActual) JET_PVOID pvParam, + _In_ JET_UINT32 cbParamMax, + _Out_opt_ JET_UINT32 * pcbParamActual ); #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_PKG_ESENT) */ #pragma endregion @@ -11906,7 +11926,7 @@ JET_ERR JET_API JetPrereadTablesW( _In_ JET_SESID sesid, _In_ JET_DBID dbid, _In_reads_( cwszTables ) JET_PCWSTR * rgwszTables, - _In_ long cwszTables, + _In_ JET_INT32 cwszTables, _In_ JET_GRBIT grbit ); #ifdef JET_UNICODE @@ -11931,35 +11951,35 @@ JetPrereadIndexRange( _In_ JET_SESID sesid, _In_ JET_TABLEID tableid, _In_ const JET_INDEX_RANGE * const pIndexRange, - _In_ const unsigned long cPageCacheMin, - _In_ const unsigned long cPageCacheMax, + _In_ const JET_UINT32 cPageCacheMin, + _In_ const JET_UINT32 cPageCacheMax, _In_ JET_GRBIT grbit, - _Out_opt_ unsigned long * const pcPageCacheActual ); + _Out_opt_ JET_UINT32 * const pcPageCacheActual ); #endif // JET_VERSION >= 0x0A00 #if ( JET_VERSION >= 0x0A01 ) JET_ERR JET_API JetRetrieveColumnByReference( - _In_ const JET_SESID sesid, - _In_ const JET_TABLEID tableid, - _In_reads_bytes_( cbReference ) const void * const pvReference, - _In_ const unsigned long cbReference, - _In_ const unsigned long ibData, - _Out_writes_bytes_to_opt_( cbData, min( cbData, *pcbActual ) ) void * const pvData, - _In_ const unsigned long cbData, - _Out_opt_ unsigned long * const pcbActual, - _In_ const JET_GRBIT grbit ); + _In_ const JET_SESID sesid, + _In_ const JET_TABLEID tableid, + _In_reads_bytes_( cbReference ) const JET_PCVOID pvReference, + _In_ const JET_UINT32 cbReference, + _In_ const JET_UINT32 ibData, + _Out_writes_bytes_to_opt_( cbData, min( cbData, *pcbActual ) ) const JET_PVOID pvData, + _In_ const JET_UINT32 cbData, + _Out_opt_ JET_UINT32 * const pcbActual, + _In_ const JET_GRBIT grbit ); JET_ERR JET_API JetPrereadColumnsByReference( _In_ const JET_SESID sesid, _In_ const JET_TABLEID tableid, - _In_reads_( cReferences ) const void * const * const rgpvReferences, - _In_reads_( cReferences ) const unsigned long * const rgcbReferences, - _In_ const unsigned long cReferences, - _In_ const unsigned long cPageCacheMin, - _In_ const unsigned long cPageCacheMax, - _Out_opt_ unsigned long * const pcReferencesPreread, + _In_reads_( cReferences ) const JET_PCVOID * const rgpvReferences, + _In_reads_( cReferences ) const JET_UINT32 * const rgcbReferences, + _In_ const JET_UINT32 cReferences, + _In_ const JET_UINT32 cPageCacheMin, + _In_ const JET_UINT32 cPageCacheMax, + _Out_opt_ JET_UINT32 * const pcReferencesPreread, _In_ const JET_GRBIT grbit ); #endif // JET_VERSION >= 0x0A01 @@ -11969,21 +11989,21 @@ JET_ERR JET_API JetPrereadColumnsByReference( JET_ERR JET_API JetStreamRecords( _In_ JET_SESID sesid, _In_ JET_TABLEID tableid, - _In_ const unsigned long ccolumnid, + _In_ const JET_UINT32 ccolumnid, _In_reads_opt_( ccolumnid ) const JET_COLUMNID * const rgcolumnid, - _Out_writes_bytes_to_opt_( cbData, *pcbActual ) void * const pvData, - _In_ const unsigned long cbData, - _Out_opt_ unsigned long * const pcbActual, + _Out_writes_bytes_to_opt_( cbData, *pcbActual ) const JET_PVOID pvData, + _In_ const JET_UINT32 cbData, + _Out_opt_ JET_UINT32 * const pcbActual, _In_ const JET_GRBIT grbit ); JET_ERR JET_API JetRetrieveColumnFromRecordStream( - _Inout_updates_bytes_( cbData ) void * const pvData, - _In_ const unsigned long cbData, - _Out_ unsigned long * const piRecord, + _Inout_updates_bytes_( cbData ) const JET_PVOID pvData, + _In_ const JET_UINT32 cbData, + _Out_ JET_UINT32 * const piRecord, _Out_ JET_COLUMNID * const pcolumnid, - _Out_ unsigned long * const pitagSequence, - _Out_ unsigned long * const pibValue, - _Out_ unsigned long * const pcbValue ); + _Out_ JET_UINT32 * const pitagSequence, + _Out_ JET_UINT32 * const pibValue, + _Out_ JET_UINT32 * const pcbValue ); #endif // JET_VERSION >= 0x0A01 From eb45d9a9371ca7ab903ccf130f973eee413550cd Mon Sep 17 00:00:00 2001 From: Alexandre Costa Date: Thu, 8 Sep 2022 05:10:46 +0000 Subject: [PATCH 014/102] Remove bad assert in ErrCATIBuildFIELDArray(), fix bad printf format string in CPAGE::ErrCheckPage() [Substrate:298df9ad4f45dfaf930c821c680a9901d2d3e8a9] --- dev/ese/src/ese/cat.cxx | 1 - dev/ese/src/ese/cpage.cxx | 14 +++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/dev/ese/src/ese/cat.cxx b/dev/ese/src/ese/cat.cxx index e8204502..9702d0f2 100644 --- a/dev/ese/src/ese/cat.cxx +++ b/dev/ese/src/ese/cat.cxx @@ -6403,7 +6403,6 @@ LOCAL ERR ErrCATIBuildFIELDArray( { Assert( JET_errRecordDeleted != err ); Assert( locOnCurBM == pfucbCatalog->locLogical ); - Assert( Pcsr( pfucbCatalog )->FLatched() ); if ( JET_errNoCurrentRecord != err ) return err; } diff --git a/dev/ese/src/ese/cpage.cxx b/dev/ese/src/ese/cpage.cxx index 3d35281e..49f4b469 100644 --- a/dev/ese/src/ese/cpage.cxx +++ b/dev/ese/src/ese/cpage.cxx @@ -4932,15 +4932,15 @@ ERR CPAGE::ErrCheckPage( if ( errGetLine < JET_errSuccess || !FOnData( line.pv, line.cb ) ) { // catch all - CHAR szGetLineErr[40]; - OSStrCbFormatA( szGetLineErr, sizeof( szGetLineErr ), "GetLineFailed:%d\n", errGetLine ); + WCHAR wszGetLineErr[40]; + OSStrCbFormatW( wszGetLineErr, sizeof( wszGetLineErr ), L"GetLineFailed:%d\n", errGetLine ); MakeCorruptionDetailsSz( L"GetLineFailed:%d\n", errGetLine ); MakeCorruptionDetailsSz( L"UNCAUGHT: TAG %d ErrGetPtr() failed or got line off page (ib=%d, cb=%d, err=%d,f=%d).", itag, ib, cb, errGetLine, FOnData( line.pv, line.cb ) ); (*pcprintf)( "%ws\r\n", wszCorruptionDetails ); // there should not be too many errors coming from ErrGetLine() that we can't embed the err in the corruption type. PageAssertTrack( *this, FNegTest( fCorruptingPageLogically ), "GetLineFailed:%d\n", errGetLine ); #ifdef DEBUG - Error( ErrCaptureCorruptedPageInfoSz( mode, szGetLineErr, wszCorruptionDetails, fLogEvent ) ); + Error( ErrCaptureCorruptedPageInfoSz( mode, wszGetLineErr, wszCorruptionDetails, fLogEvent ) ); #endif } @@ -5040,12 +5040,12 @@ ERR CPAGE::ErrCheckPage( if ( errGetKdf < JET_errSuccess ) { - CHAR szGetKdfErr [40]; + WCHAR wszGetKdfErr [40]; // there should not be too many errors coming from ErrNDIGetKeydataflags() that we can't embed the err in the corruption type. - OSStrCbFormatA( szGetKdfErr, sizeof( szGetKdfErr ), "NdiGetKdfFailed:%d", errGetKdf ); - MakeCorruptionDetailsSz( L"TAG %d failed to load NDIGetKeydataFlags with %d\r\n", errGetKdf ); + OSStrCbFormatW( wszGetKdfErr, sizeof( wszGetKdfErr ), L"NdiGetKdfFailed:%d", errGetKdf ); + MakeCorruptionDetailsSz( L"TAG %d failed to load NDIGetKeydataFlags with %d\r\n", itag, errGetKdf ); (*pcprintf)( "%ws\r\n", wszCorruptionDetails ); - Error( ErrCaptureCorruptedPageInfoSz( mode, L"TagDataTooLarge", wszCorruptionDetails, fLogEvent ) ); + Error( ErrCaptureCorruptedPageInfoSz( mode, wszGetKdfErr, wszCorruptionDetails, fLogEvent ) ); } if ( grbitExtensiveCheck & CheckLinesInOrder ) From 8261eadd4577d62bc676bf6e3094437f15200e8b Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Thu, 8 Sep 2022 15:10:50 +0000 Subject: [PATCH 015/102] ESE Block Cache: add more bits to the Presence Filter This increases the size of the presence filter from 8 bits to 10 bits. The code is written so that it is easy to change it again in the future as long as each bucket is byte aligned. [Substrate:294faf2955f563d2a84f6b3b8f8433344b5358a8] --- .../src/os/blockcache/_hashedlrukcache.hxx | 78 +++++++++++++++---- 1 file changed, 62 insertions(+), 16 deletions(-) diff --git a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx index e236a8bc..7641ccc9 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx @@ -4297,12 +4297,12 @@ class THashedLRUKCache CBucket() { - C_ASSERT( sizeof( *this ) == sizeof( m_rgbFingerprint ) ); - memset( this, 0, sizeof( *this ) ); + memset( this, 0, sizeof( CBucket ) ); } - static size_t C() { return _countof( m_rgbFingerprint ); } - static size_t CbitFingerprint() { return sizeof( m_rgbFingerprint[0] ) * 8; } + static size_t C() { return 4; } + static size_t CbitFingerprint() { return sizeof( CBucket ) * CHAR_BIT / C(); } + static WORD WFingerprintMask() { return (WORD)( ( 1 << CbitFingerprint() ) - 1 ); } BOOL FTryAdd( _In_ const WORD wFingerprint, _Out_opt_ WORD* const pwFingerprintReplaced = NULL ) @@ -4312,11 +4312,13 @@ class THashedLRUKCache *pwFingerprintReplaced = 0; } - for ( int i = 0; i < _countof( m_rgbFingerprint ); i++ ) + const QWORD qwFingerprints = QwFingerprints(); + + for ( int i = 0; i < C(); i++ ) { - if ( m_rgbFingerprint[ i ] == 0 ) + if ( WFingerprint( qwFingerprints, i ) == 0 ) { - m_rgbFingerprint[ i ] = (BYTE)wFingerprint; + SetFingerprint( qwFingerprints, i, wFingerprint ); return fTrue; } } @@ -4324,10 +4326,10 @@ class THashedLRUKCache if ( pwFingerprintReplaced ) { const size_t iRandom = rand(); - const size_t iVictim = iRandom % _countof( m_rgbFingerprint ); + const size_t iVictim = iRandom % C(); - *pwFingerprintReplaced = m_rgbFingerprint[ iVictim ]; - m_rgbFingerprint[ iVictim ] = (BYTE)wFingerprint; + *pwFingerprintReplaced = WFingerprint( qwFingerprints, iVictim ); + SetFingerprint( qwFingerprints, iVictim, wFingerprint ); } return fFalse; @@ -4335,11 +4337,13 @@ class THashedLRUKCache BOOL FTryRemove( _In_ const WORD wFingerprint ) { - for ( int i = 0; i < _countof( m_rgbFingerprint ); i++ ) + const QWORD qwFingerprints = QwFingerprints(); + + for ( int i = 0; i < C(); i++ ) { - if ( m_rgbFingerprint[ i ] == wFingerprint ) + if ( WFingerprint( qwFingerprints, i ) == wFingerprint ) { - m_rgbFingerprint[ i ] = 0; + SetFingerprint( qwFingerprints, i, 0 ); return fTrue; } } @@ -4349,9 +4353,11 @@ class THashedLRUKCache BOOL FContains( _In_ const WORD wFingerprint ) { - for ( int i = 0; i < _countof( m_rgbFingerprint ); i++ ) + const QWORD qwFingerprints = QwFingerprints(); + + for ( int i = 0; i < C(); i++ ) { - if ( m_rgbFingerprint[ i ] == wFingerprint ) + if ( WFingerprint( qwFingerprints, i ) == wFingerprint ) { return fTrue; } @@ -4362,7 +4368,47 @@ class THashedLRUKCache private: - BYTE m_rgbFingerprint[ 4 ]; + QWORD QwFingerprints() const + { + QWORD qwFingerprints; + memcpy( &qwFingerprints, m_rgbFingerprint, sizeof( m_rgbFingerprint ) ); + + return qwFingerprints; + } + + WORD WFingerprint( _In_ const QWORD qwFingerprints, _In_ const size_t i ) + { + return ( qwFingerprints >> ( i * CbitFingerprint() ) ) & WFingerprintMask(); + } + + void SetFingerprint( _In_ const QWORD qwFingerprints, _In_ const size_t i, _In_ const WORD wFingerprint ) + { + QWORD qwFingerprintsT = qwFingerprints & ~( QWORD( WFingerprintMask() << ( i * CbitFingerprint() ) ) ); + qwFingerprintsT = qwFingerprintsT | ( QWORD( wFingerprint & WFingerprintMask() ) << ( i * CbitFingerprint() ) ); + + memcpy( m_rgbFingerprint, &qwFingerprintsT, sizeof( m_rgbFingerprint ) ); + } + + private: + + union + { + BYTE m_rgbFingerprint[ 5 ]; + + struct + { + DWORD m_wFingerprint0 : 10; + DWORD m_wFingerprint1 : 10; + DWORD m_wFingerprint2 : 10; + }; + + struct + { + BYTE _ : 8; + DWORD __ : 22; + DWORD m_wFingerprint3 : 10; + }; + }; }; #include From ad8d3ce8c03446f8b85dd57ee7bdfa1620625ea5 Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Thu, 8 Sep 2022 15:11:08 +0000 Subject: [PATCH 016/102] ESE Block Cache: try to reduce slab access overhead in RequestFinalizeRead In prod we are still spending a huge amount of time in RequestFinalizeRead accessing slabs. This is true even though we don't cache reads. We also don't seem to be hitting the cache very much because ErrVerifyCluster is a very small percentage of the cost. This change causes each CRequest to remember if it had a cache hit or miss. We can then use the cache hit flag to rule out accesses to the cache. This could allow us to avoid accessing the slab. This is more certain than using the presence filter. If this does work then we may need to look more into the effectiveness of the presence filter. [Substrate:7464b6e75b808cf65e14ae076908f3474fd8ec5f] --- .../src/os/blockcache/_hashedlrukcache.hxx | 32 ++++++++++++------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx index 7641ccc9..7122156a 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx @@ -132,7 +132,9 @@ class THashedLRUKCache CClusterWriteCompletionContext( this, (CMeteredSection::Group)0 ), CClusterWriteCompletionContext( this, (CMeteredSection::Group)1 ) }, + m_fCacheMiss( fFalse ), m_cCachedFileIO( 0 ), + m_fCacheHit( fFalse ), m_cCachingFileIO( 0 ), m_iorl( this ), m_pfnIORangeLockAcquired( NULL ), @@ -151,6 +153,8 @@ class THashedLRUKCache ERR ErrStatus() const { return THashedLRUKCacheBase::CRequest::ErrStatus(); } typename CHashedLRUKCachedFileTableEntry::CIORangeLockBase* Piorl() { return &m_iorl; } BOOL FIOCompleted() const { return m_msIO.FEmpty(); } + BOOL FCacheMiss() const { return m_fCacheMiss; } + BOOL FCacheHit() const { return m_fCacheHit; } COffsets OffsetsForIO() const { @@ -252,6 +256,7 @@ class THashedLRUKCache DWORD_PTR( this ), ClusterReadHandoff_ ) ); + m_fCacheHit = fTrue; m_cCachingFileIO++; HandleError: @@ -273,6 +278,7 @@ class THashedLRUKCache Call( ErrRead( Pcfte()->Pff(), ibOffset, cbData, pbData, iomCacheMiss ) ); + m_fCacheMiss = fTrue; m_cCachedFileIO++; HandleError: @@ -552,7 +558,9 @@ class THashedLRUKCache const CClusterWriteCompletionContext m_rgcwcc[ 2 ]; CMeteredSection m_msIO; + BOOL m_fCacheMiss; int m_cCachedFileIO; + BOOL m_fCacheHit; int m_cCachingFileIO; typename CCountedInvasiveList::CElement m_ileRequestsByThread; typename CCountedInvasiveList::CElement m_ileIOs; @@ -4749,6 +4757,7 @@ class THashedLRUKCache ERR ErrIsPossiblyCached( _In_ CHashedLRUKCachedFileTableEntry* pcfte, _In_ const QWORD ibCachedBlock, + _In_ const BOOL fKnownNotCached, _Out_ QWORD* const pibSlab, _Out_ CCachedBlockId* const pcbid, _Out_ BOOL* const pfPossiblyCached ); @@ -5702,7 +5711,7 @@ ERR THashedLRUKCache::ErrInvalidate( _In_ const VolumeId volumeid, // determine if we are likely to have this cached block - Call( ErrIsPossiblyCached( pcfte, ibCachedBlock, &ibSlab, &cbid, &fPossiblyCached ) ); + Call( ErrIsPossiblyCached( pcfte, ibCachedBlock, fFalse, &ibSlab, &cbid, &fPossiblyCached ) ); // if the cached block is not possibly cached then skip this offset @@ -8457,7 +8466,7 @@ void THashedLRUKCache::RequestRead( _In_ CRequest* const preq // determine if we are likely to have this cached block - Call( ErrIsPossiblyCached( prequest->Pcfte(), ibCachedBlock, &ibSlab, &cbid, &fPossiblyCached ) ); + Call( ErrIsPossiblyCached( prequest->Pcfte(), ibCachedBlock, fFalse, &ibSlab, &cbid, &fPossiblyCached ) ); // if the cached block is possibly cached then determine if it is cached @@ -8583,7 +8592,7 @@ void THashedLRUKCache::RequestFinalizeRead( _In_ CRequest* const // determine if we are likely to have this cached block - Call( ErrIsPossiblyCached( prequest->Pcfte(), ibCachedBlock, &ibSlab, &cbid, &fPossiblyCached ) ); + Call( ErrIsPossiblyCached( prequest->Pcfte(), ibCachedBlock, !prequest->FCacheHit(), &ibSlab, &cbid, &fPossiblyCached ) ); // if the cached block is possibly cached then determine if it is cached. otherwise, if we want to cache it // then ensure that we check to see if it is already cached @@ -8735,7 +8744,7 @@ void THashedLRUKCache::RequestWrite( _In_ CRequest* const preq // determine if we are likely to have this cached block - Call( ErrIsPossiblyCached( prequest->Pcfte(), ibCachedBlock, &ibSlab, &cbid, &fPossiblyCached ) ); + Call( ErrIsPossiblyCached( prequest->Pcfte(), ibCachedBlock, fFalse, &ibSlab, &cbid, &fPossiblyCached ) ); // if the cached block is possibly cached then determine if it is cached. otherwise, if we want to cache it // then ensure that we check to see if it is already cached @@ -8966,6 +8975,7 @@ HandleError: template ERR THashedLRUKCache::ErrIsPossiblyCached( _In_ CHashedLRUKCachedFileTableEntry* pcfte, _In_ const QWORD ibCachedBlock, + _In_ const BOOL fKnownNotCached, _Out_ QWORD* const pibSlab, _Out_ CCachedBlockId* const pcbid, _Out_ BOOL* const pfPossiblyCached ) @@ -8975,28 +8985,26 @@ ERR THashedLRUKCache::ErrIsPossiblyCached( _In_ CHashedLRUKCachedFileTab BOOL fPossiblyCached = fFalse; *pibSlab = 0; - new( pcbid ) CCachedBlockId(); *pfPossiblyCached = fFalse; // compute the cached block id for this offset - const CCachedBlockId cbid( pcfte->Volumeid(), - pcfte->Fileid(), - pcfte->Fileserial(), - (CachedBlockNumber)( ibCachedBlock / cbCachedBlock ) ); + new( pcbid ) CCachedBlockId( pcfte->Volumeid(), + pcfte->Fileid(), + pcfte->Fileserial(), + (CachedBlockNumber)( ibCachedBlock / cbCachedBlock ) ); // determine the slab that should hold this cached block - Call( m_pcbsmHash->ErrGetSlabForCachedBlock( cbid, &ibSlab ) ); + Call( m_pcbsmHash->ErrGetSlabForCachedBlock( *pcbid, &ibSlab ) ); // determine if it is possible that we have this cached block in the cache - fPossiblyCached = m_pcbpf->FPossiblyContains( ibSlab, cbid ); + fPossiblyCached = !fKnownNotCached && m_pcbpf->FPossiblyContains( ibSlab, *pcbid ); // return the results *pibSlab = ibSlab; - new( pcbid ) CCachedBlockId( cbid.Volumeid(), cbid.Fileid(), cbid.Fileserial(), cbid.Cbno() ); *pfPossiblyCached = fPossiblyCached; HandleError: From 28fd93edef863c8b76a3a2da94310825e790d2fd Mon Sep 17 00:00:00 2001 From: Alexandre Costa Date: Fri, 9 Sep 2022 01:19:13 +0000 Subject: [PATCH 017/102] Shrink should succeed with sdrNoLowAvailSpace when it can't grow the DB, not fail with JET_errOutOfDatabaseSpace FShrinkIsRunning() means that the engine is currently running code under Shrink. FShrinkIsActive() means that, in addition to Shrink being running, it's also still actively trying to move pages to lower offsets to truncate the file. The transition from FShrinkIsActive() true to false (while still keeping FShrinkIsRunning() true) happens when we're faced with a scenario where not growing the database could lead to leaked space, so we give up shrinking at that point. Recently, we saw cases where Shrink was being disabled and tried again to grow the DB on the same stack. That caused Shrink to fail with JET_errOutOfDatabaseSpace, instead of succeeding and bailing out more cleanly with sdrNoLowAvailSpace . This bug only affects MCDBs, which have a max DB size set, and doesn't have any practical impact, other than just optics listing failed Shrink runs, when they should have been reported as benign cases where we don't have space below the target to allocate space anymore. The fix here is to return the benign errSPNoSpaceBelowShrinkTarget error even if Shrink is inactive, but still running. [Substrate:cc0cf8193e3da0101994d130f2c8c62bd4a17a92] --- dev/ese/src/ese/space.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/ese/src/ese/space.cxx b/dev/ese/src/ese/space.cxx index 23ec47f2..c6482020 100644 --- a/dev/ese/src/ese/space.cxx +++ b/dev/ese/src/ese/space.cxx @@ -11860,7 +11860,7 @@ LOCAL ERR ErrSPIExtendDB( // If Shrink is running, signal it to bail and let the database grow IFF // if we can violate the max DB size constraint, which is a proxy for when // not doing so would lead to space leaks. - if ( g_rgfmp[pfucbRoot->ifmp].FShrinkIsActive() ) + if ( g_rgfmp[pfucbRoot->ifmp].FShrinkIsRunning() ) { if ( fMayViolateMaxSize ) { From 192cd7b1c259c562ae812d21553b6ab4920af6f7 Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Fri, 9 Sep 2022 15:08:03 +0000 Subject: [PATCH 018/102] ESE Block Cache: allow uncached file access to succeed if the underlying file does not support FileIdInfo During testing, it was discovered that accessing files in certain cases (e.g. \\tsclient\c\foo.txt) failed because GetFileInformationByHandleEx with FileIdInfo was failing with ERROR_INVALID_PARAMETER when the file id was being queried for the file while it was being opened. This change traps this case and causes an invalid file id to be returned. This case was already established to be valid for a file that cannot ever participate in EBC caching. [Substrate:b8ce56bd1da9fef79a88d092da7520abce640908] --- .../src/os/blockcache/_fileidentification.hxx | 86 +++++++++++-------- 1 file changed, 48 insertions(+), 38 deletions(-) diff --git a/dev/ese/src/os/blockcache/_fileidentification.hxx b/dev/ese/src/os/blockcache/_fileidentification.hxx index 9ec3a873..06eb25ce 100644 --- a/dev/ese/src/os/blockcache/_fileidentification.hxx +++ b/dev/ese/src/os/blockcache/_fileidentification.hxx @@ -426,62 +426,72 @@ ERR TFileIdentification::ErrGetFileId( _In_z_ const WCHAR* const wszP sizeof( fileIdInfo ) ); if ( !fSuccess ) { - Error( ErrGetLastError() ); + if ( GetLastError() == ERROR_INVALID_PARAMETER ) + { + // exception: if we get invalid parameter then presume file id is not supported for this file + } + else + { + Error( ErrGetLastError() ); + } } - // get the volume id - - volumeid = (VolumeId)fileIdInfo.VolumeSerialNumber; + if ( fSuccess ) + { + // get the volume id - // defend against illegal values for the volumeid + volumeid = (VolumeId)fileIdInfo.VolumeSerialNumber; - if ( volumeid == volumeidInvalid ) - { - Error( ErrBlockCacheInternalError( wszPath, "InvalidVolumeId" ) ); - } + // defend against illegal values for the volumeid - // try to get the volume handle for the volume id. this will only work if it is a local volume + if ( volumeid == volumeidInvalid ) + { + Error( ErrBlockCacheInternalError( wszPath, "InvalidVolumeId" ) ); + } - err = ErrOpenVolumeById( volumeid, &pvhce ); - err = err == JET_errInvalidPath ? JET_errSuccess : err; - Call( err ); + // try to get the volume handle for the volume id. this will only work if it is a local volume - // get the file id if requested + err = ErrOpenVolumeById( volumeid, &pvhce ); + err = err == JET_errInvalidPath ? JET_errSuccess : err; + Call( err ); - if ( pfileid ) - { - // if this is a local volume then return a normal file id + // get the file id if requested - if ( pvhce ) + if ( pfileid ) { - // extract the file id + // if this is a local volume then return a normal file id - fileid = *( (FileId*)fileIdInfo.FileId.Identifier ); + if ( pvhce ) + { + // extract the file id - // defend against illegal values for the file id + fileid = *( (FileId*)fileIdInfo.FileId.Identifier ); - if ( fileid == fileidInvalid ) - { - Error( ErrBlockCacheInternalError( wszPath, "InvalidFileId" ) ); - } + // defend against illegal values for the file id + + if ( fileid == fileidInvalid ) + { + Error( ErrBlockCacheInternalError( wszPath, "InvalidFileId" ) ); + } - // defend against truncation of the file id because we cannot handle that + // defend against truncation of the file id because we cannot handle that - FILE_ID_128 fileId; - memset( fileId.Identifier, 0, _cbrg( fileId.Identifier ) ); - *( (FileId*)fileId.Identifier ) = fileid; - if ( memcmp( fileIdInfo.FileId.Identifier, fileId.Identifier, sizeof( fileId.Identifier ) ) ) - { - Error( ErrBlockCacheInternalError( wszPath, "TruncatedFileId" ) ); + FILE_ID_128 fileId; + memset( fileId.Identifier, 0, _cbrg( fileId.Identifier ) ); + *( (FileId*)fileId.Identifier ) = fileid; + if ( memcmp( fileIdInfo.FileId.Identifier, fileId.Identifier, sizeof( fileId.Identifier ) ) ) + { + Error( ErrBlockCacheInternalError( wszPath, "TruncatedFileId" ) ); + } } - } - // if this is not a local volume then return an invalid file id to prevent caching + // if this is not a local volume then return an invalid file id to prevent caching - else - { - volumeid = volumeidInvalid; - fileid = fileidInvalid; + else + { + volumeid = volumeidInvalid; + fileid = fileidInvalid; + } } } From 16550e44a4cf4ed3f1a446c25f8c321f8aa03c54 Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Fri, 9 Sep 2022 15:08:51 +0000 Subject: [PATCH 019/102] ESE Block Cache: fix Cuckoo Filter issue with standby list The cuckoo filter reference implementation contained a standby list to hold any item that couldn't fit in the main table. I implemented this because this is the kind of thing that isn't added without a good reason. The problem is that while we faithfully add/remove items from the standby list, we forget to consult it on the set membership check! So we can return a false negative for an item in the standby list which is not allowed. This fix consults the standby list during the set membership check. [Substrate:dbc2e5d499d2ff297f07ce3ea28b59cd6c202cb1] --- .../src/os/blockcache/_hashedlrukcache.hxx | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx index 7122156a..735f8f47 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx @@ -4191,6 +4191,13 @@ class THashedLRUKCache } } + // if we still haven't found it then check the standby list + + if ( !fSucceeded ) + { + fSucceeded = FIsStandbyItemPresent( dwHash ); + } + return fSucceeded; } @@ -4257,6 +4264,18 @@ class THashedLRUKCache return fSucceeded; } + BOOL FIsStandbyItemPresent( _In_ const DWORD dwHash ) + { + BOOL fSucceeded = fFalse; + + for ( int iStandby = 0; !fSucceeded && iStandby < m_cStandby; iStandby++ ) + { + fSucceeded = (DWORD)AtomicRead( (LONG*)&m_rgdwStandby[ iStandby ] ) == dwHash; + } + + return fSucceeded; + } + void RetireStandbyItem() { DWORD dwHash = 0; From d7b3cecab1787cb5c0d429fb9967c7c490d45fc9 Mon Sep 17 00:00:00 2001 From: Alexandre Costa Date: Fri, 9 Sep 2022 16:31:09 +0000 Subject: [PATCH 020/102] Turn Shrink archiving off [Substrate:40691275fbb2f5a0103f73cf7d646019e7e9991f] --- dev/ese/src/ese/dbshrink.cxx | 12 ++++++------ dev/ese/src/ese/sysparamtable.g.cxx | 2 +- dev/ese/src/noncore/interop/exceptions.h | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/dev/ese/src/ese/dbshrink.cxx b/dev/ese/src/ese/dbshrink.cxx index f8ec81ac..5bcb388d 100644 --- a/dev/ese/src/ese/dbshrink.cxx +++ b/dev/ese/src/ese/dbshrink.cxx @@ -890,6 +890,12 @@ ERR ErrSHKShrinkDbFromEof( Call( ErrSHKIShrinkEofTracingBegin( pinst->m_pfsapi, g_rgfmp[ ifmp ].WszDatabaseName(), &pcprintfShrinkTraceRaw ) ); + // First, delete any previously saved shrink archive files. + if ( !BoolParam( pinst, JET_paramFlight_EnableShrinkArchiving ) ) + { + (void)ErrIODeleteShrinkArchiveFiles( ifmp ); + } + // Bail out as early as possible if the database is already sufficiently small. if ( pfmp->CpgOfCb( pfmp->CbOwnedFileSize() ) <= pfmp->CpgShrinkDatabaseSizeLimit() ) { @@ -899,12 +905,6 @@ ERR ErrSHKShrinkDbFromEof( Assert( !BoolParam( JET_paramEnableViewCache ) ); - // First, delete any previously saved shrink archive files. - if ( !BoolParam( pinst, JET_paramFlight_EnableShrinkArchiving ) ) - { - (void)ErrIODeleteShrinkArchiveFiles( ifmp ); - } - IFMP ifmpDummy; Call( ErrDBOpenDatabase( ppib, pfmp->WszDatabaseName(), &ifmpDummy, JET_bitDbExclusive ) ); Assert( pfmp->FExclusiveBySession( ppib ) ); diff --git a/dev/ese/src/ese/sysparamtable.g.cxx b/dev/ese/src/ese/sysparamtable.g.cxx index e661d2bc..f150841e 100644 --- a/dev/ese/src/ese/sysparamtable.g.cxx +++ b/dev/ese/src/ese/sysparamtable.g.cxx @@ -102,7 +102,7 @@ JetParam g_rgparamRaw[] = NORMAL_PARAM(JET_paramFlight_LowMetedOpsThreshold, CJetParam::typeInteger, 0, 0, 0, 1, 0, 1024, 40), NORMAL_PARAM(JET_paramFlight_MetedOpStarvedThreshold, CJetParam::typeInteger, 0, 0, 0, 1, 50, 2147483647, 3000), NORMAL_PARAM(JET_paramFlight_MaxRBSBuffers, CJetParam::typeInteger, 0, 0, 0, 0, 1, 2147483647, 20), - NORMAL_PARAM(JET_paramFlight_EnableShrinkArchiving, CJetParam::typeBoolean, 1, 0, 0, 0, 0, 1, 1), + NORMAL_PARAM(JET_paramFlight_EnableShrinkArchiving, CJetParam::typeBoolean, 1, 0, 0, 0, 0, 1, 0), NORMAL_PARAM(JET_paramFlight_EnableBackupDuringRecovery, CJetParam::typeBoolean, 1, 0, 0, 0, 0, -1, 0), NORMAL_PARAM(JET_paramFlight_RBSRollIntervalSec, CJetParam::typeInteger, 1, 0, 0, 0, 0, 604800, 43200), NORMAL_PARAM(JET_paramFlight_RBSMaxRequiredRange, CJetParam::typeInteger, 1, 0, 0, 0, 0, 10000, 1000), diff --git a/dev/ese/src/noncore/interop/exceptions.h b/dev/ese/src/noncore/interop/exceptions.h index 43d279b5..ebd75e5d 100644 --- a/dev/ese/src/noncore/interop/exceptions.h +++ b/dev/ese/src/noncore/interop/exceptions.h @@ -5691,7 +5691,7 @@ namespace Isam public ref class IsamDatabaseAttachedForRecoveryException : public IsamUsageException { public: - IsamDatabaseAttachedForRecoveryException() : IsamUsageException( "Database is attached but only for recovery. It must be explicitly attached before it can be opened. ", JET_errDatabaseAttachedForRecovery) + IsamDatabaseAttachedForRecoveryException() : IsamUsageException( "Database is attached but only for recovery. It must be explicitly attached before it can be opened.", JET_errDatabaseAttachedForRecovery) { } From 19ded4de02edd2255202f62c62d5ed0dc382fb5a Mon Sep 17 00:00:00 2001 From: Vakishan Date: Fri, 9 Sep 2022 21:44:34 +0000 Subject: [PATCH 021/102] Fix bug when pvpage is null due to some flags being set but not compress or dehydrate flags [Substrate:77921ebeb87905b214b643dc767869412d76f568] --- dev/ese/src/ese/rbsdump.cxx | 2 +- dev/ese/src/ese/revertsnapshot.cxx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/ese/src/ese/rbsdump.cxx b/dev/ese/src/ese/rbsdump.cxx index 4289edc1..3de00811 100644 --- a/dev/ese/src/ese/rbsdump.cxx +++ b/dev/ese/src/ese/rbsdump.cxx @@ -232,7 +232,7 @@ VOID RBSRecToSz( const RBSRecord *prbsrec, __out_bcount(cbRBSRec) PSTR szRBSRec, dataImage.SetPv( prbsdbpgrec->m_rgbData ); dataImage.SetCb( prbsrec->m_usRecLength - sizeof(RBSDbPageRecord) ); - if ( prbsdbpgrec->m_fFlags ) + if ( prbsdbpgrec->m_fFlags & ( fRBSPreimageCompressed | fRBSPreimageDehydrated ) ) { pbDataDecompressed = (BYTE *)PvOSMemoryPageAlloc( g_cbPageFromSnapshot, NULL ); if ( pbDataDecompressed && diff --git a/dev/ese/src/ese/revertsnapshot.cxx b/dev/ese/src/ese/revertsnapshot.cxx index c29ee877..d8b9a6ec 100644 --- a/dev/ese/src/ese/revertsnapshot.cxx +++ b/dev/ese/src/ese/revertsnapshot.cxx @@ -6083,7 +6083,7 @@ ERR CRBSRevertContext::ErrApplyRBSRecord( RBSRecord* prbsrec, BOOL fCaptureDbHdr pvPage = PvOSMemoryPageAlloc( m_cbDbPageSize, NULL ); Alloc( pvPage ); - if ( prbsdbpgrec->m_fFlags ) + if ( prbsdbpgrec->m_fFlags & ( fRBSPreimageCompressed | fRBSPreimageDehydrated ) ) { Call( ErrRBSDecompressPreimage( dataImage, m_cbDbPageSize, (BYTE*) pvPage, prbsdbpgrec->m_pgno, prbsdbpgrec->m_fFlags ) ); } From a18f45e7b59f238c94779e7001e27b2fbbc8856c Mon Sep 17 00:00:00 2001 From: Vakishan Date: Sat, 10 Sep 2022 00:41:31 +0000 Subject: [PATCH 022/102] Add dbtime to RBS root page records and remove duplicates from being applied in a loop using dbtime to uniquely identify the record [Substrate:ab598789a5d286a2e3b6e9419a5e346fcd79a3b0] --- dev/ese/src/ese/_log/logredo.cxx | 6 +- dev/ese/src/ese/dbshrink.cxx | 2 +- dev/ese/src/ese/rbsdump.cxx | 11 ++- dev/ese/src/ese/revertsnapshot.cxx | 94 +++++++++++++++++-------- dev/ese/src/ese/space.cxx | 2 +- dev/ese/src/inc/revertsnapshot.h | 21 ++++-- dev/ese/src/inc/revertsnapshotrecords.h | 11 ++- 7 files changed, 100 insertions(+), 47 deletions(-) diff --git a/dev/ese/src/ese/_log/logredo.cxx b/dev/ese/src/ese/_log/logredo.cxx index 6b9aaf7a..bf81dec3 100644 --- a/dev/ese/src/ese/_log/logredo.cxx +++ b/dev/ese/src/ese/_log/logredo.cxx @@ -7335,7 +7335,7 @@ ERR LOG::ErrLGRIRedoOperation( LR *plr ) // This is because we will apply this root page move record only if we have preimage of the root. if ( g_rgfmp[ ifmp ].FRBSOn() ) { - CallR( g_rgfmp[ ifmp ].PRBS()->ErrCaptureRootPageMove( dbid, 0, plrcreatemefdp->le_pgno ) ); + CallR( g_rgfmp[ ifmp ].PRBS()->ErrCaptureRootPageMove( dbid, 0, plrcreatemefdp->le_pgno, plrcreatemefdp->le_dbtime ) ); } break; @@ -7385,7 +7385,7 @@ ERR LOG::ErrLGRIRedoOperation( LR *plr ) // This is because we will apply this root page move record only if we have preimage of the root. if ( g_rgfmp[ ifmp ].FRBSOn() ) { - CallR( g_rgfmp[ ifmp ].PRBS()->ErrCaptureRootPageMove( dbid, 0, pgnoFDP ) ); + CallR( g_rgfmp[ ifmp ].PRBS()->ErrCaptureRootPageMove( dbid, 0, pgnoFDP, plrcreatesefdp->le_dbtime ) ); } break; @@ -10965,7 +10965,7 @@ ERR LOG::ErrLGRIRedoRootPageMove( PIB* const ppib, const DBTIME dbtime ) // This is because we will apply this root page move record only if we have preimages of both the source and destination. if ( g_rgfmp[ ifmp ].FRBSOn() ) { - Call( g_rgfmp[ ifmp ].PRBS()->ErrCaptureRootPageMove( g_rgfmp[ ifmp ].Dbid(), rm.pgnoFDP, rm.pgnoNewFDP ) ); + Call( g_rgfmp[ ifmp ].PRBS()->ErrCaptureRootPageMove( g_rgfmp[ ifmp ].Dbid(), rm.pgnoFDP, rm.pgnoNewFDP, rm.dbtimeAfter ) ); } HandleError: diff --git a/dev/ese/src/ese/dbshrink.cxx b/dev/ese/src/ese/dbshrink.cxx index 5bcb388d..13589b42 100644 --- a/dev/ese/src/ese/dbshrink.cxx +++ b/dev/ese/src/ese/dbshrink.cxx @@ -2239,7 +2239,7 @@ ERR ErrSHKRootPageMove( // This is because we will apply this root page move record only if we have preimages of both the source and destination. if ( g_rgfmp[ ifmp ].FRBSOn() ) { - Call( g_rgfmp[ ifmp ].PRBS()->ErrCaptureRootPageMove( g_rgfmp[ ifmp ].Dbid(), rm.pgnoFDP, rm.pgnoNewFDP ) ); + Call( g_rgfmp[ ifmp ].PRBS()->ErrCaptureRootPageMove( g_rgfmp[ ifmp ].Dbid(), rm.pgnoFDP, rm.pgnoNewFDP, rm.dbtimeAfter ) ); } // Re-open cursors and verify that the move looks consistent. diff --git a/dev/ese/src/ese/rbsdump.cxx b/dev/ese/src/ese/rbsdump.cxx index 3de00811..c3dda626 100644 --- a/dev/ese/src/ese/rbsdump.cxx +++ b/dev/ese/src/ese/rbsdump.cxx @@ -166,6 +166,8 @@ const char* const szEmptyPages2 = "EmptyPg2 "; const char* const szRootPageMove = "PageMoveR"; +const char* const szRootPageMove2 = "PgMoveR2 "; + const char * szRBSRecUnknown = "*UNKNOWN*"; const INT cbRBSRecBuf = 1024 + cbFormattedDataMax; @@ -182,6 +184,7 @@ const char * SzRBSRec( BYTE bRBSRecType ) case rbsrectypeDbEmptyPages: return szEmptyPages; case rbsrectypeDbEmptyPages2: return szEmptyPages2; case rbsrectypeRootPageMove: return szRootPageMove; + case rbsrectypeRootPageMove2: return szRootPageMove2; default: return szRBSRecUnknown; } } @@ -289,12 +292,14 @@ VOID RBSRecToSz( const RBSRecord *prbsrec, __out_bcount(cbRBSRec) PSTR szRBSRec, break; } case rbsrectypeRootPageMove: + case rbsrectypeRootPageMove2: { - RBSRootPageMoveRecord* prbsrootpgmoverec = (RBSRootPageMoveRecord*)prbsrec; - OSStrCbFormatA( rgchBuf, sizeof( rgchBuf ), " [%u:%lu:%lu]", + RBSRootPageMove2Record* prbsrootpgmoverec = (RBSRootPageMove2Record*)prbsrec; + OSStrCbFormatA( rgchBuf, sizeof( rgchBuf ), " [%u:%lu:%lu], dbtime:%I64x", (DBID) prbsrootpgmoverec->m_dbid, (ULONG) prbsrootpgmoverec->m_pgnoSrc, - (ULONG) prbsrootpgmoverec->m_pgnoDest ); + (ULONG) prbsrootpgmoverec->m_pgnoDest, + (DBTIME) prbsrootpgmoverec->m_dbtime ); OSStrCbAppendA( szRBSRec, cbRBSRec, rgchBuf ); break; } diff --git a/dev/ese/src/ese/revertsnapshot.cxx b/dev/ese/src/ese/revertsnapshot.cxx index d8b9a6ec..bd54ae52 100644 --- a/dev/ese/src/ese/revertsnapshot.cxx +++ b/dev/ese/src/ese/revertsnapshot.cxx @@ -2002,18 +2002,19 @@ ERR CRevertSnapshot::ErrCaptureDbAttach( WCHAR* wszDatabaseName, const DBID dbid return ErrCaptureRec( &dbRec, &dataRec, &dummy ); } -ERR CRevertSnapshot::ErrCaptureRootPageMove( const DBID dbid, const PGNO pgnoSrc, const PGNO pgnoDest ) +ERR CRevertSnapshot::ErrCaptureRootPageMove( const DBID dbid, const PGNO pgnoSrc, const PGNO pgnoDest, const DBTIME dbtime ) { RBS_POS dummy; DATA dataDummy; dataDummy.Nullify(); - RBSRootPageMoveRecord rootpagemoverec; - rootpagemoverec.m_bRecType = rbsrectypeRootPageMove; - rootpagemoverec.m_usRecLength = sizeof( RBSRootPageMoveRecord ); + RBSRootPageMove2Record rootpagemoverec; + rootpagemoverec.m_bRecType = rbsrectypeRootPageMove2; + rootpagemoverec.m_usRecLength = sizeof( RBSRootPageMove2Record ); rootpagemoverec.m_dbid = dbid; rootpagemoverec.m_pgnoSrc = pgnoSrc; rootpagemoverec.m_pgnoDest = pgnoDest; + rootpagemoverec.m_dbtime = dbtime; return ErrCaptureRec( &rootpagemoverec, &dataDummy, &dummy ); } @@ -4561,9 +4562,9 @@ ERR CRBSDatabaseRevertContext::ErrAddPage( void* pvPage, PGNO pgno, BOOL fReplac // Add root page record to the array of records. // -ERR CRBSDatabaseRevertContext::ErrAddRootPageRecord( BOOL fDeleteOperation, PGNO pgnoSrc, PGNO pgnoDest ) +ERR CRBSDatabaseRevertContext::ErrAddRootPageRecord( const BOOL fDeleteOperation, const PGNO pgnoSrc, const PGNO pgnoDest, const DBTIME dbtime ) { - CRootPageRecord rootpagerec( fDeleteOperation, pgnoSrc, pgnoDest ); + CRootPageRecord rootpagerec( fDeleteOperation, pgnoSrc, pgnoDest, dbtime ); CArray< CRootPageRecord >::ERR errArray = CArray< CRootPageRecord >::ERR::errSuccess; @@ -4572,7 +4573,11 @@ ERR CRBSDatabaseRevertContext::ErrAddRootPageRecord( BOOL fDeleteOperation, PGNO m_rgrootpagerec = new CArray< CRootPageRecord >( 32 ); } - errArray = m_rgrootpagerec->ErrSetEntry( m_rgrootpagerec->Size(), rootpagerec ); + // Only add entry to root page records if it doesn't exist already + if ( m_rgrootpagerec->SearchLinear( rootpagerec, CRBSDatabaseRevertContext::ICRBSDatabaseRootPageRecordEquals ) == CArray< CRootPageRecord >::iEntryNotFound ) + { + errArray = m_rgrootpagerec->ErrSetEntry( m_rgrootpagerec->Size(), rootpagerec ); + } if ( errArray != CArray< CRootPageRecord >::ERR::errSuccess ) { @@ -5012,6 +5017,20 @@ INLINE INT __cdecl CRBSDatabaseRevertContext::ICRBSDatabaseRevertContextPgEquals return ( ( ppg1->PgNo() == ppg2->PgNo() ) ? 0 : ( ( ppg1->PgNo() < ppg2->PgNo() ) ? -1 : +1 ) ); } +// Equals method to say if both root page records are the same or not. +// +INLINE INT __cdecl CRBSDatabaseRevertContext::ICRBSDatabaseRootPageRecordEquals( const CRootPageRecord* prootpagerecord1, const CRootPageRecord* prootpagerecord2 ) +{ + Assert( prootpagerecord1 ); + Assert( prootpagerecord2 ); + + return ( ( + prootpagerecord1->PgnoSrc() == prootpagerecord2->PgnoSrc() && + prootpagerecord1->PgnoDest() == prootpagerecord2->PgnoDest() && + prootpagerecord1->FDeleteOperation() == prootpagerecord2->FDeleteOperation() && + prootpagerecord1->Dbtime() == prootpagerecord2->Dbtime() ) ? 0 : 1 ); +} + void CRBSDatabaseRevertContext::OsWriteIoComplete( const ERR errIo, IFileAPI* const pfapi, @@ -5916,13 +5935,13 @@ ERR CRBSRevertContext::ErrAddRevertedNewPage( DBID dbid, PGNO pgnoRevertNew, con // Add root page record to the array of records for the given database. // -ERR CRBSRevertContext::ErrAddRootPageRecord( DBID dbid, BOOL fDeleteOperation, PGNO pgnoSrc, PGNO pgnoDest ) +ERR CRBSRevertContext::ErrAddRootPageRecord( const DBID dbid, const BOOL fDeleteOperation, const PGNO pgnoSrc, const PGNO pgnoDest, const DBTIME dbtime ) { Assert( m_mpdbidirbsdbrc[ dbid ] != irbsdbrcInvalid ); Assert( m_mpdbidirbsdbrc[ dbid ] <= m_irbsdbrcMaxInUse ); Assert( m_rgprbsdbrcAttached[ m_mpdbidirbsdbrc[ dbid ] ] ); - return m_rgprbsdbrcAttached[ m_mpdbidirbsdbrc[ dbid ] ]->ErrAddRootPageRecord( fDeleteOperation, pgnoSrc, pgnoDest ); + return m_rgprbsdbrcAttached[ m_mpdbidirbsdbrc[ dbid ] ]->ErrAddRootPageRecord( fDeleteOperation, pgnoSrc, pgnoDest, dbtime ); } // Checks whether we continue applying RBS, taking any required actions. @@ -6063,22 +6082,13 @@ ERR CRBSRevertContext::ErrApplyRBSRecord( RBSRecord* prbsrec, BOOL fCaptureDbHdr // At the end of the snapshot, we will go through and apply the flag while going through the records in the reverse order. // We need to do this in reverse order since we allow shrink/table creation to happen in the snapshot window. // So we might have to move the flag from one root page to another. - if ( prbsdbpgrec->m_fFlags & fRBSDeletedTableRootPage && fRevertStateRootPageRecords ) - { - Call( ErrAddRootPageRecord( prbsdbpgrec->m_dbid, fTrue, prbsdbpgrec->m_pgno, pgnoNull ) ); - } - - // When we are starting snapshot in JET_revertstateRootPageRecords, all we need is to capture the fact that root page record needs to be marked with FDPDeleteFlag and - // the fact that we saw a preimage for this page so that root page move record can decide if it needs to be applied. - // All the preimage applying work should have already been completed. - if ( fRevertStateRootPageRecords ) - { - SetPageCaptured( prbsdbpgrec->m_dbid, prbsdbpgrec->m_pgno ); - return JET_errSuccess; - } + BOOL fAddRootPageRecord = prbsdbpgrec->m_fFlags & fRBSDeletedTableRootPage && fRevertStateRootPageRecords; // If either revert always flag is set or if we have not already captured page preimage to revert to, capture the page record. - if ( prbsdbpgrec->m_fFlags & fRBSPreimageRevertAlways || !fPageAlreadyCaptured ) + BOOL fAddDbPageRecord = ( prbsdbpgrec->m_fFlags & fRBSPreimageRevertAlways || !fPageAlreadyCaptured ) && !fRevertStateRootPageRecords; + + + if ( fAddDbPageRecord || fAddRootPageRecord ) { pvPage = PvOSMemoryPageAlloc( m_cbDbPageSize, NULL ); Alloc( pvPage ); @@ -6097,15 +6107,36 @@ ERR CRBSRevertContext::ErrApplyRBSRecord( RBSRecord* prbsrec, BOOL fCaptureDbHdr CPAGE cpage; cpage.LoadPage( ifmpNil, prbsdbpgrec->m_pgno, pvPage, m_cbDbPageSize ); - cpage.PreparePageForWrite( CPAGE::PageFlushType::pgftUnknown, fTrue, fTrue ); - // We will check the root page for fPageFDPDelete, if it is a root page and fPageFDPDelete is not set on the preimage and - // no other preimage was captured as part of this snapshot. If one was captured, we should have done the check for fPageFDPDelete then. - fCheckPageFDPRootDelete = cpage.FRootPage() && !cpage.FPageFDPDelete() && !fPageAlreadyCaptured; + if ( fAddRootPageRecord ) + { + Call( ErrAddRootPageRecord( prbsdbpgrec->m_dbid, fTrue, prbsdbpgrec->m_pgno, pgnoNull, cpage.Dbtime() ) ); + cpage.UnloadPage(); + OSMemoryPageFree( pvPage ); + pvPage = NULL; + } + else + { + Assert( fAddDbPageRecord ); + cpage.PreparePageForWrite( CPAGE::PageFlushType::pgftUnknown, fTrue, fTrue ); + + // We will check the root page for fPageFDPDelete, if it is a root page and fPageFDPDelete is not set on the preimage and + // no other preimage was captured as part of this snapshot. If one was captured, we should have done the check for fPageFDPDelete then. + fCheckPageFDPRootDelete = cpage.FRootPage() && !cpage.FPageFDPDelete() && !fPageAlreadyCaptured; - cpage.UnloadPage(); + cpage.UnloadPage(); - Call( ErrAddPageRecord( pvPage, prbsdbpgrec->m_dbid, prbsdbpgrec->m_pgno, fPageAlreadyCaptured, fCheckPageFDPRootDelete, fFalse, fFalse, m_cbDbPageSize ) ); + Call( ErrAddPageRecord( pvPage, prbsdbpgrec->m_dbid, prbsdbpgrec->m_pgno, fPageAlreadyCaptured, fCheckPageFDPRootDelete, fFalse, fFalse, m_cbDbPageSize ) ); + } + } + + // When we are starting snapshot in JET_revertstateRootPageRecords, all we need is to capture the fact that root page record needs to be marked with FDPDeleteFlag and + // the fact that we saw a preimage for this page so that root page move record can decide if it needs to be applied. + // All the preimage applying work should have already been completed. + if ( fRevertStateRootPageRecords ) + { + SetPageCaptured( prbsdbpgrec->m_dbid, prbsdbpgrec->m_pgno ); + return JET_errSuccess; } break; @@ -6148,8 +6179,9 @@ ERR CRBSRevertContext::ErrApplyRBSRecord( RBSRecord* prbsrec, BOOL fCaptureDbHdr } case rbsrectypeRootPageMove: + case rbsrectypeRootPageMove2: { - RBSRootPageMoveRecord* prbsrootpagemoverec = (RBSRootPageMoveRecord*)prbsrec; + RBSRootPageMove2Record* prbsrootpagemoverec = (RBSRootPageMove2Record*)prbsrec; // We will apply root page record only if we have captured a preimage for the source and destination page. // RootPageMove record is captured whenever shrink does a root page move or when a table is just created (in this case pgnoSrc = 0). @@ -6163,7 +6195,7 @@ ERR CRBSRevertContext::ErrApplyRBSRecord( RBSRecord* prbsrec, BOOL fCaptureDbHdr if ( ( prbsrootpagemoverec->m_pgnoSrc == 0 || FPageAlreadyCaptured( prbsrootpagemoverec->m_dbid, prbsrootpagemoverec->m_pgnoSrc ) ) && FPageAlreadyCaptured( prbsrootpagemoverec->m_dbid, prbsrootpagemoverec->m_pgnoDest ) ) { - Call( ErrAddRootPageRecord( prbsrootpagemoverec->m_dbid, fFalse, prbsrootpagemoverec->m_pgnoSrc, prbsrootpagemoverec->m_pgnoDest ) ); + Call( ErrAddRootPageRecord( prbsrootpagemoverec->m_dbid, fFalse, prbsrootpagemoverec->m_pgnoSrc, prbsrootpagemoverec->m_pgnoDest, prbsrootpagemoverec->m_dbtime ) ); } break; diff --git a/dev/ese/src/ese/space.cxx b/dev/ese/src/ese/space.cxx index c6482020..878ef248 100644 --- a/dev/ese/src/ese/space.cxx +++ b/dev/ese/src/ese/space.cxx @@ -3470,7 +3470,7 @@ ERR ErrSPCreate( // and if the database is reverted, delete flag can be cleared accordingly. if ( g_rgfmp[ pfucb->ifmp ].FRBSOn() ) { - Call( g_rgfmp[ pfucb->ifmp ].PRBS()->ErrCaptureRootPageMove( g_rgfmp[ pfucb->ifmp ].Dbid(), 0, pgnoFDP ) ); + Call( g_rgfmp[ pfucb->ifmp ].PRBS()->ErrCaptureRootPageMove( g_rgfmp[ pfucb->ifmp ].Dbid(), 0, pgnoFDP, g_rgfmp[ pfucb->ifmp ].DbtimeLast() ) ); } Assert( !FFUCBSpace( pfucb ) ); diff --git a/dev/ese/src/inc/revertsnapshot.h b/dev/ese/src/inc/revertsnapshot.h index b921b5d5..676afb2c 100644 --- a/dev/ese/src/inc/revertsnapshot.h +++ b/dev/ese/src/inc/revertsnapshot.h @@ -201,7 +201,7 @@ INLINE ULONG IbRBSSegmentOffsetFromFullOffset( DWORD ib ) { return ib & cbRBSS #include "revertsnapshotrecords.h" const ULONG ulRBSVersionMajor = 1; -const ULONG ulRBSVersionMinor = 5; +const ULONG ulRBSVersionMinor = 6; class CRevertSnapshot; @@ -723,7 +723,8 @@ class CRevertSnapshot ERR ErrCaptureRootPageMove( const DBID dbid, const PGNO pgnoSrc, - const PGNO pgnoDest ); + const PGNO pgnoDest, + const DBTIME dbtime ); ERR ErrCaptureEmptyPages( DBID dbid, @@ -1034,14 +1035,17 @@ class CRBSDatabaseRevertContext : public CZeroInit PGNO m_pgnoSrc; PGNO m_pgnoDest; + DBTIME m_dbtime; + public: - CRootPageRecord( BOOL fDeleteOperation, PGNO pgnoSrc, PGNO pgnoDest ) : + CRootPageRecord( BOOL fDeleteOperation, PGNO pgnoSrc, PGNO pgnoDest, DBTIME dbtime ) : m_fDeleteOperation( fDeleteOperation ), m_pgnoSrc( pgnoSrc ), - m_pgnoDest( pgnoDest ) {} + m_pgnoDest( pgnoDest ), + m_dbtime( dbtime ) {} - CRootPageRecord() : CRootPageRecord( fFalse, 0, 0 ) {} + CRootPageRecord() : CRootPageRecord( fFalse, 0, 0, dbtimeNil ) {} ~CRootPageRecord() {} @@ -1050,12 +1054,14 @@ class CRBSDatabaseRevertContext : public CZeroInit m_fDeleteOperation = rootpagerec.m_fDeleteOperation; m_pgnoSrc = rootpagerec.m_pgnoSrc; m_pgnoDest = rootpagerec.m_pgnoDest; + m_dbtime = rootpagerec.m_dbtime; return *this; } PGNO PgnoSrc() const { return m_pgnoSrc; } PGNO PgnoDest() const { return m_pgnoDest; } BOOL FDeleteOperation() const { return m_fDeleteOperation; } + DBTIME Dbtime() const { return m_dbtime; } }; class CPageFDPDeleteState @@ -1131,6 +1137,7 @@ class CRBSDatabaseRevertContext : public CZeroInit ERR ErrDBDiskPageFDPRootDelete( void* pvPage, PGNO pgno, BOOL fCheckDiskPageFDPRootDelete, BOOL fOverrideDiskPageFDPRootDelete, USHORT cbDbPageSize, BOOL* pfPgnoFDPRootDelete ); static INT __cdecl ICRBSDatabaseRevertContextCmpPgRec( const CPagePointer* pppg1, const CPagePointer* pppg2 ); static INT __cdecl ICRBSDatabaseRevertContextPgEquals( const CPagePointer* pppg1, const CPagePointer* pppg2 ); + static INT __cdecl ICRBSDatabaseRootPageRecordEquals( const CRootPageRecord* prootpagerecord1, const CRootPageRecord* prootpagerecord2 ); static void OsWriteIoComplete( const ERR errIo, IFileAPI* const pfapi, @@ -1149,7 +1156,7 @@ class CRBSDatabaseRevertContext : public CZeroInit ERR ErrSetDbstateAfterRevert( SIGNATURE* psignRbsHdrFlush ); ERR ErrRBSCaptureDbHdrFromRBS( RBSDbHdrRecord* prbsdbhdrrec, BOOL* pfGivenDbfilehdrCaptured ); ERR ErrAddPage( void* pvPage, PGNO pgno, BOOL fReplaceCached, BOOL fCheckDiskPageFDPRootDelete, BOOL fOverrideExistingPageFDPRootDelete, BOOL fSetExistingPageFDPRootDelete, USHORT cbDbPageSize, BOOL* pfPageAddedToCache ); - ERR ErrAddRootPageRecord( BOOL fDeleteOperation, PGNO pgnoSrc, PGNO pgnoDest ); + ERR ErrAddRootPageRecord( const BOOL fDeleteOperation, const PGNO pgnoSrc, const PGNO pgnoDest, const DBTIME dbtime ); ERR ErrApplyRootPageRecords(); ERR ErrCapturePageFDPDeleteState( const LONG lRBSGen, const USHORT cbDbPageSize, _In_ PCWSTR wszDirPath, _In_ PCWSTR wszRBSBaseName ); ERR ErrRBSInitRootPageDeleteState( const LONG lRBSGen, const USHORT cbDbPageSize, _In_ PCWSTR wszDirPath, _In_ PCWSTR wszRBSBaseName, _Out_ CPG* pcpgCached ); @@ -1231,7 +1238,7 @@ class CRBSRevertContext : public CZeroInit ERR ErrApplyRBSRecord( RBSRecord* prbsrec, BOOL fCaptureDBHdrFromRBS, BOOL fDbHeaderOnly, BOOL fRevertStateRootPageRecords, BOOL* pfGivenDbfilehdrCaptured ); ERR ErrCheckApplyRBSContinuation(); ERR ErrAddRevertedNewPage( DBID dbid, PGNO pgnoRevertNew, const BOOL fPageFDPNonRevertableDelete ); - ERR ErrAddRootPageRecord( DBID dbid, BOOL fDeleteOperation, PGNO pgnoSrc, PGNO pgnoDest ); + ERR ErrAddRootPageRecord( const DBID dbid, const BOOL fDeleteOperation, const PGNO pgnoSrc, const PGNO pgnoDest, const DBTIME dbtime ); ERR ErrRevertCheckpointInit(); ERR ErrRevertCheckpointCleanup(); diff --git a/dev/ese/src/inc/revertsnapshotrecords.h b/dev/ese/src/inc/revertsnapshotrecords.h index 14068fcf..60fadeca 100644 --- a/dev/ese/src/inc/revertsnapshotrecords.h +++ b/dev/ese/src/inc/revertsnapshotrecords.h @@ -20,7 +20,8 @@ #define rbsrectypeDbEmptyPages 7 #define rbsrectypeDbEmptyPages2 8 #define rbsrectypeRootPageMove 9 -#define rbsrectypeMax 10 +#define rbsrectypeRootPageMove2 10 +#define rbsrectypeMax 11 PERSISTED struct RBSRecord @@ -82,6 +83,12 @@ struct RBSRootPageMoveRecord : public RBSRecord UnalignedLittleEndian m_pgnoDest; }; +PERSISTED +struct RBSRootPageMove2Record : public RBSRootPageMoveRecord +{ + UnalignedLittleEndian m_dbtime; +}; + PERSISTED struct RBSDbEmptyPages2Record : public RBSDbEmptyPagesRecord { @@ -129,6 +136,8 @@ CbRBSRecFixed( BYTE bRecType ) return sizeof( RBSDbEmptyPages2Record ); case rbsrectypeRootPageMove: return sizeof( RBSRootPageMoveRecord ); + case rbsrectypeRootPageMove2: + return sizeof( RBSRootPageMove2Record ); default: Assert( fFalse ); case rbsrectypeNOP: From ff456cce60fc6fc930e498702fdbe233f2d0f5f0 Mon Sep 17 00:00:00 2001 From: Vakishan Date: Sat, 10 Sep 2022 00:53:36 +0000 Subject: [PATCH 023/102] Add additional validation to make sure reverted table being redelete have PageFDPDelete flag set For ExtentFreed records for non-revertable deletes, we will validate the root page has FDP delete flag set if db was reverted i.e., current lgpos redo is before lgposcommitbeforerevert and we are not within the required range. [Substrate:025d113a262b36ed2aa2e3fee62f4fd59f3745bd] --- dev/ese/published/inc/jethdr.w | 5 ++-- dev/ese/src/_errstr/errdata.txt | 1 + dev/ese/src/_res/jetmsg.mc | 6 ++++ dev/ese/src/ese/_log/logredo.cxx | 36 ++++++++++++++++++++++++ dev/ese/src/ese/logapi.cxx | 4 +-- dev/ese/src/ese/revertsnapshot.cxx | 11 ++++++-- dev/ese/src/ese/space.cxx | 3 ++ dev/ese/src/ese/sysparamtable.g.cxx | 8 +++--- dev/ese/src/inc/revertsnapshot.h | 2 +- dev/ese/src/noncore/interop/exceptions.h | 26 +++++++++++++++++ dev/ese/src/noncore/interop/params.h | 3 +- 11 files changed, 92 insertions(+), 13 deletions(-) diff --git a/dev/ese/published/inc/jethdr.w b/dev/ese/published/inc/jethdr.w index c707e8df..0506852e 100644 --- a/dev/ese/published/inc/jethdr.w +++ b/dev/ese/published/inc/jethdr.w @@ -4091,8 +4091,8 @@ typedef enum #define JET_paramFlight_ExtentPageCountCacheVerifyOnly 114 // Verify values read from the Extent Page Count Cache rather than just returning them. #define JET_paramFlight_EnablePgnoFDPLastSetTime 115 // whether we want to enable setting PgnoPFDSetTime in the system table for a table entry. -#define JET_paramFlight_EnableScanCheck2Flags 116 // whether we want to enable logging flags in ScanCheck2 log record. -#define JET_paramFlight_EnableExtentFreed2 117 // whether we want to enable logging ExtentFreed2 LR after the efv upgrade. +#define JET_paramFlight_EnableFDPDeleteFlagCheckOnExtentFreedRedo 116 // whether we want to check if FDP delete flag is set when we redo extent freed LR. +//#define JET_paramFlight_EnableExtentFreed2 117 // whether we want to enable logging ExtentFreed2 LR after the efv upgrade. #define JET_paramFlight_RBSLargeRevertableDeletePages 118 // Large revertable delete size for a table (in pages) beyond which we will track the deletes. #define JET_paramFlight_RBSRevertableDeleteIfTooSoonTimeNull 119 // If set, we will do a revertable table delete even if NonRevertableTableDelete flag is passed provided NonRevertable delete is failing due to JET_errRBSDeleteTableTooSoon due to time not being set. Note: If JET_bitRevertableTableDeleteIfTooSoon is set, this variant is ignored. @@ -6692,6 +6692,7 @@ typedef JET_ERR (JET_API * JET_PFNEMITLOGDATA)( #define errRBSCorruptUninitializedRBSRemoved -1946 /* The RBS being loaded is either missing or corrupt and uninitialized, so it has been removed. */ #define JET_errRBSRedeleteFDPUnexpected -1947 /* Indicates that the reverted table marked with delete flag is unexpected. */ #define JET_errRBSRCPageFDPDeleteFileCorrupt -1948 /* The database cannot be reverted to the expected time as we are in apply root page records state but the corresponding file to init the page state is corrupt */ +#define JET_errRBSRedeleteFDPExpected -1949 /* Indicates that the reverted table is expected to be marked with delete flag. */ // begin_PubEsent #define JET_wrnDefragAlreadyRunning 2000 /* Online defrag already running on specified database */ diff --git a/dev/ese/src/_errstr/errdata.txt b/dev/ese/src/_errstr/errdata.txt index c8e50bef..7999b47c 100644 --- a/dev/ese/src/_errstr/errdata.txt +++ b/dev/ese/src/_errstr/errdata.txt @@ -579,6 +579,7 @@ INTERNAL_ERR( -1946, State, errRBSCorruptUninitializedRBSRemoved ) EXTERNAL_ERR( -1947, Corruption, JET_errRBSRedeleteFDPUnexpected ) EXTERNAL_ERR( -1948, Corruption, JET_errRBSRCPageFDPDeleteFileCorrupt ) + EXTERNAL_ERR( -1949, Corruption, JET_errRBSRedeleteFDPExpected ) EXTERNAL_WRN( 2000, State, JET_wrnDefragAlreadyRunning ) EXTERNAL_WRN( 2001, State, JET_wrnDefragNotRunning ) EXTERNAL_WRN( 2002, State, JET_wrnDatabaseScanAlreadyRunning ) diff --git a/dev/ese/src/_res/jetmsg.mc b/dev/ese/src/_res/jetmsg.mc index 1f1cb8fd..9b387e57 100644 --- a/dev/ese/src/_res/jetmsg.mc +++ b/dev/ese/src/_res/jetmsg.mc @@ -1695,6 +1695,12 @@ Language=English %1 (%2) %3Database %4: Page %5 in a B-Tree (ObjectId: %6) failed verfication due to page FDP delete flag mismatch at log position %7. The remote page FDP delete flag persisted to the log record was %8 but the actual page FDP delete flag was %9. This problem is likely due to revert of the database gone wrong. Please investigate the database revert operations performed on the database.%n . +MessageId=571 +SymbolicName=DB_PAGE_FDP_REDELETE_EXPECTED_ID +Language=English +%1 (%2) %3Database %4: Page %5 failed verification due to being reverted using revert snapshot without having page fdp delete flag set, but the log record at log position %6 expected the page to have the flag set (log position commit before revert - %7). This problem is likely due to revert of the database gone wrong. Please investigate the database revert operations performed on the database.%n +. + ;// !!! ARE YOU SURE you're adding this in the right place !!! ??? diff --git a/dev/ese/src/ese/_log/logredo.cxx b/dev/ese/src/ese/_log/logredo.cxx index bf81dec3..94cfab89 100644 --- a/dev/ese/src/ese/_log/logredo.cxx +++ b/dev/ese/src/ese/_log/logredo.cxx @@ -11543,6 +11543,10 @@ ERR LOG::ErrLGRIRedoExtentFreed( const LREXTENTFREED2 * const plrextentfreed ) if ( fTableRootPage ) { + // If this database was reverted and we redo'ing an extent freed LR on root page, we should be seeing fPageFDPDelete flag set on the page when it was reverted. + LGPOS lgposCommitBeforeRevert = pfmp->Pdbfilehdr()->le_lgposCommitBeforeRevert; + BOOL fPageFDPDeleteFlagExpected = BoolParam( m_pinst, JET_paramFlight_EnableFDPDeleteFlagCheckOnExtentFreedRedo ) && !pfmp->FContainsDataFromFutureLogs() && ( CmpLgpos( lgposCommitBeforeRevert, m_lgposRedo ) > 0 ); + // Capture the preimage of the table root and pass flag to indicate this is a delete table so that we special mark this table when reverted. // We should generally not be touching the table pages before table delete. // But in case we did due to some bug or some unexpected scenario, we will pass fRBSPreimageRevertAlways to make sure we always keep the table deleted. @@ -11551,6 +11555,7 @@ ERR LOG::ErrLGRIRedoExtentFreed( const LREXTENTFREED2 * const plrextentfreed ) pgnoFirst, dbtimeLast, fRBSDeletedTableRootPage, + fPageFDPDeleteFlagExpected, BfpriBFMake( PctFMPCachePriority( ifmp ), (BFTEMPOSFILEQOS) qosIODispatchImmediate ), TcCurr() ); @@ -11561,6 +11566,37 @@ ERR LOG::ErrLGRIRedoExtentFreed( const LREXTENTFREED2 * const plrextentfreed ) BFMarkAsSuperCold( ifmp, pgnoFirst ); err = JET_errSuccess; } + + if ( err == JET_errRBSRedeleteFDPExpected ) + { + OSTraceSuspendGC(); + const WCHAR* rgwsz[] = + { + pfmp->WszDatabaseName(), + OSFormatW( L"%I32u (0x%08x)", pgnoFirst, pgnoFirst ), + OSFormatW( L"(%08I32X,%04hX,%04hX)", m_lgposRedo.lGeneration, m_lgposRedo.isec, m_lgposRedo.ib ), + OSFormatW( L"(%08I32X,%04hX,%04hX)", lgposCommitBeforeRevert.lGeneration, lgposCommitBeforeRevert.isec, lgposCommitBeforeRevert.ib ), + }; + + // Raise corruption event + UtilReportEvent( + eventError, + DATABASE_CORRUPTION_CATEGORY, + DB_PAGE_FDP_REDELETE_EXPECTED_ID, + _countof( rgwsz ), + rgwsz, + 0, + NULL, + pfmp->Pinst() ); + + OSUHAPublishEvent( + HaDbFailureTagCorruption, pfmp->Pinst(), HA_DATABASE_CORRUPTION_CATEGORY, + HaDbIoErrorNone, pfmp->WszDatabaseName(), 0, 0, + HA_DB_PAGE_FDP_REDELETE_EXPECTED_ID, _countof( rgwsz ), rgwsz ); + + OSTraceResumeGC(); + } + CallR( err ); } else if ( fEmptyPageFDPDeleted ) diff --git a/dev/ese/src/ese/logapi.cxx b/dev/ese/src/ese/logapi.cxx index 35a86ccd..51653b13 100644 --- a/dev/ese/src/ese/logapi.cxx +++ b/dev/ese/src/ese/logapi.cxx @@ -1004,7 +1004,7 @@ ERR ErrLGScanCheck( #endif // !DEBUG const BOOL fScanCheck2Supported = g_rgfmp[ifmp].FEfvSupported( JET_efvScanCheck2 ); - const BOOL fScanCheck2FlagsSupported = g_rgfmp[ ifmp ].FEfvSupported( JET_efvScanCheck2Flags ) && BoolParam( pinst, JET_paramFlight_EnableScanCheck2Flags ); + const BOOL fScanCheck2FlagsSupported = g_rgfmp[ ifmp ].FEfvSupported( JET_efvScanCheck2Flags ); const BOOL fScanEnableFDPDelete = g_rgfmp[ ifmp ].FEfvSupported( JET_efvRBSTooSoonDeletes ) && BoolParam( pinst, JET_paramFlight_EnableScanCheckFDPDeleteFlags ); DATA data; @@ -4869,7 +4869,7 @@ ERR ErrLGExtentFreed( LOG * const plog, const IFMP ifmp, const PGNO pgnoFirst, c // This is not logged for all free extent operations, only for those related to deleting a whole space tree. DATA rgdata[1]; - const BOOL fExtentFreed2Supported = g_rgfmp[ ifmp ].FEfvSupported( JET_efvExtentFreed2 ) && BoolParam( PinstFromIfmp( ifmp ), JET_paramFlight_EnableExtentFreed2 ); + const BOOL fExtentFreed2Supported = g_rgfmp[ ifmp ].FEfvSupported( JET_efvExtentFreed2 ); ERR err = JET_errSuccess; LREXTENTFREED* const plr = fExtentFreed2Supported ? ( new LREXTENTFREED2() ) : ( new LREXTENTFREED() ); diff --git a/dev/ese/src/ese/revertsnapshot.cxx b/dev/ese/src/ese/revertsnapshot.cxx index bd54ae52..ccb5a2b2 100644 --- a/dev/ese/src/ese/revertsnapshot.cxx +++ b/dev/ese/src/ese/revertsnapshot.cxx @@ -1873,7 +1873,7 @@ JETUNITTESTDB( RBSPreImageCompression, Xpress, dwOpenDatabase ) #endif // ENABLE_JET_UNIT_TEST -ERR ErrRBSRDWLatchAndCapturePreImage( _In_ const IFMP ifmp, _In_ const PGNO pgno, _In_ const DBTIME dbtimeLast, ULONG fPreImageFlags, _In_ const BFPriority bfpri, _In_ const TraceContext& tc ) +ERR ErrRBSRDWLatchAndCapturePreImage( _In_ const IFMP ifmp, _In_ const PGNO pgno, _In_ const DBTIME dbtimeLast, ULONG fPreImageFlags, _In_ BOOL fPageFDPDeleteFlagExpected, _In_ const BFPriority bfpri, _In_ const TraceContext& tc ) { if ( g_rgfmp[ifmp].Dbid() == dbidTemp || !g_rgfmp[ifmp].FRBSOn() ) @@ -1883,7 +1883,6 @@ ERR ErrRBSRDWLatchAndCapturePreImage( _In_ const IFMP ifmp, _In_ const PGNO pgno ERR err = JET_errSuccess; RBS_POS rbspos; - CPAGE cpageT; BFLatch bfl; // get exclusive latch. @@ -1899,6 +1898,14 @@ ERR ErrRBSRDWLatchAndCapturePreImage( _In_ const IFMP ifmp, _In_ const PGNO pgno return JET_errSuccess; } + // If it is expected for the page to have PageFDPDelete flag set but it isn't, return error. + // Used during redo to validate any previously deleted and reverted table being redeleted has the flag set. + if ( fPageFDPDeleteFlagExpected && !( ( (CPAGE::PGHDR*)bfl.pv )->fFlags & CPAGE::fPageFDPDelete ) ) + { + BFRDWUnlatch( &bfl ); + return ErrERRCheck( JET_errRBSRedeleteFDPExpected ); + } + Call( g_rgfmp[ifmp].PRBS()->ErrCapturePreimage( g_rgfmp[ifmp].Dbid(), pgno, diff --git a/dev/ese/src/ese/space.cxx b/dev/ese/src/ese/space.cxx index 878ef248..f75206f4 100644 --- a/dev/ese/src/ese/space.cxx +++ b/dev/ese/src/ese/space.cxx @@ -8525,6 +8525,7 @@ ERR ErrSPCaptureNonRevertableFDPRootPage( PIB *ppib, FCB* pfcbFDPToFree, const P PgnoRoot( pfucb ), dbtimeNil, fRBSDeletedTableRootPage, + fFalse, pfucb->ppib->BfpriPriority( pfucb->ifmp ), *tcScope ) ); cpgCaptured++; @@ -8545,6 +8546,7 @@ ERR ErrSPCaptureNonRevertableFDPRootPage( PIB *ppib, FCB* pfcbFDPToFree, const P pfcbT->PgnoFDP(), dbtimeNil, fRBSDeletedTableRootPage, + fFalse, pfucb->ppib->BfpriPriority( pfucb->ifmp ), *tcScope ) ); cpgCaptured++; @@ -8563,6 +8565,7 @@ ERR ErrSPCaptureNonRevertableFDPRootPage( PIB *ppib, FCB* pfcbFDPToFree, const P pgnoLVRoot, dbtimeNil, fRBSDeletedTableRootPage, + fFalse, pfucb->ppib->BfpriPriority( pfucb->ifmp ), *tcScope ) ); cpgCaptured++; diff --git a/dev/ese/src/ese/sysparamtable.g.cxx b/dev/ese/src/ese/sysparamtable.g.cxx index f150841e..89ac3ace 100644 --- a/dev/ese/src/ese/sysparamtable.g.cxx +++ b/dev/ese/src/ese/sysparamtable.g.cxx @@ -129,8 +129,8 @@ JetParam g_rgparamRaw[] = NORMAL_PARAM(JET_paramAlternateDatabaseRecoveryPath, CJetParam::typeFolder, 0, 0, 0, 1, 0, 246, L""), NORMAL_PARAM(JET_paramFlight_ExtentPageCountCacheVerifyOnly, CJetParam::typeBoolean, 1, 1, 0, 0, 0, -1, 0), NORMAL_PARAM(JET_paramFlight_EnablePgnoFDPLastSetTime, CJetParam::typeBoolean, 1, 0, 0, 1, 0, -1, fTrue), - NORMAL_PARAM(JET_paramFlight_EnableScanCheck2Flags, CJetParam::typeBoolean, 1, 0, 0, 1, 0, -1, fTrue), - NORMAL_PARAM(JET_paramFlight_EnableExtentFreed2, CJetParam::typeBoolean, 1, 0, 0, 1, 0, -1, fTrue), + NORMAL_PARAM(JET_paramFlight_EnableFDPDeleteFlagCheckOnExtentFreedRedo, CJetParam::typeBoolean, 1, 0, 0, 1, 0, -1, fTrue), + ILLEGAL_PARAM(117), NORMAL_PARAM(JET_paramFlight_RBSLargeRevertableDeletePages, CJetParam::typeInteger, 1, 0, 0, 0, 0, 2147483647, 0), NORMAL_PARAM(JET_paramFlight_RBSRevertableDeleteIfTooSoonTimeNull, CJetParam::typeBoolean, 1, 0, 0, 0, 0, -1, fFalse), IGNORED_PARAM(JET_paramDBAPageAvailMin, CJetParam::typeInteger, 1, 1, 0, 0, 0, -1, 1280), @@ -362,8 +362,8 @@ static_assert( JET_paramIndexTuplesToIndexMax == 112, "The order of defintion fo static_assert( JET_paramAlternateDatabaseRecoveryPath == 113, "The order of defintion for JET_paramAlternateDatabaseRecoveryPath in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); static_assert( JET_paramFlight_ExtentPageCountCacheVerifyOnly == 114, "The order of defintion for JET_paramFlight_ExtentPageCountCacheVerifyOnly in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); static_assert( JET_paramFlight_EnablePgnoFDPLastSetTime == 115, "The order of defintion for JET_paramFlight_EnablePgnoFDPLastSetTime in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); -static_assert( JET_paramFlight_EnableScanCheck2Flags == 116, "The order of defintion for JET_paramFlight_EnableScanCheck2Flags in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); -static_assert( JET_paramFlight_EnableExtentFreed2 == 117, "The order of defintion for JET_paramFlight_EnableExtentFreed2 in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); +static_assert( JET_paramFlight_EnableFDPDeleteFlagCheckOnExtentFreedRedo == 116, "The order of defintion for JET_paramFlight_EnableFDPDeleteFlagCheckOnExtentFreedRedo in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); +static_assert( 117 == 117, "The order of defintion for 117 in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); static_assert( JET_paramFlight_RBSLargeRevertableDeletePages == 118, "The order of defintion for JET_paramFlight_RBSLargeRevertableDeletePages in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); static_assert( JET_paramFlight_RBSRevertableDeleteIfTooSoonTimeNull == 119, "The order of defintion for JET_paramFlight_RBSRevertableDeleteIfTooSoonTimeNull in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); static_assert( JET_paramDBAPageAvailMin == 120, "The order of defintion for JET_paramDBAPageAvailMin in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); diff --git a/dev/ese/src/inc/revertsnapshot.h b/dev/ese/src/inc/revertsnapshot.h index 676afb2c..56ebcd67 100644 --- a/dev/ese/src/inc/revertsnapshot.h +++ b/dev/ese/src/inc/revertsnapshot.h @@ -1288,4 +1288,4 @@ class CRBSRevertContext : public CZeroInit VOID UtilLoadRBSinfomiscFromRBSfilehdr( JET_RBSINFOMISC* prbsinfomisc, const ULONG cbrbsinfomisc, const RBSFILEHDR* prbsfilehdr ); VOID RBSResourcesCleanUpFromInst( _In_ INST * const pinst ); -ERR ErrRBSRDWLatchAndCapturePreImage( _In_ const IFMP ifmp, _In_ const PGNO pgno, _In_ const DBTIME dbtimeLast, ULONG fPreImageFlags, _In_ const BFPriority bfpri, _In_ const TraceContext& tc ); \ No newline at end of file +ERR ErrRBSRDWLatchAndCapturePreImage( _In_ const IFMP ifmp, _In_ const PGNO pgno, _In_ const DBTIME dbtimeLast, ULONG fPreImageFlags, _In_ const BOOL fPageFDPDeleteFlagExpected, _In_ const BFPriority bfpri, _In_ const TraceContext& tc ); \ No newline at end of file diff --git a/dev/ese/src/noncore/interop/exceptions.h b/dev/ese/src/noncore/interop/exceptions.h index ebd75e5d..4f6537b4 100644 --- a/dev/ese/src/noncore/interop/exceptions.h +++ b/dev/ese/src/noncore/interop/exceptions.h @@ -8687,6 +8687,30 @@ namespace Isam }; + [Serializable] + public ref class IsamRBSRedeleteFDPExpectedException : public IsamCorruptionException + { + public: + IsamRBSRedeleteFDPExpectedException() : IsamCorruptionException( "Indicates that the reverted table is expected to be marked with delete flag.", JET_errRBSRedeleteFDPExpected) + { + } + + // Constructor with embedded exception. Does not use the string from esent.h. + IsamRBSRedeleteFDPExpectedException( String ^ description, Exception^ innerException ) : + IsamCorruptionException( description, innerException ) + { + } + + IsamRBSRedeleteFDPExpectedException( + System::Runtime::Serialization::SerializationInfo^ info, + System::Runtime::Serialization::StreamingContext context + ) + : IsamCorruptionException( info, context ) + { + } + + }; + [Serializable] public ref class IsamDatabaseAlreadyRunningMaintenanceException : public IsamUsageException { @@ -9835,6 +9859,8 @@ static IsamErrorException^ JetErrToException( const JET_ERR err ) return gcnew IsamRBSRedeleteFDPUnexpectedException; case JET_errRBSRCPageFDPDeleteFileCorrupt: return gcnew IsamRBSRCPageFDPDeleteFileCorruptException; + case JET_errRBSRedeleteFDPExpected: + return gcnew IsamRBSRedeleteFDPExpectedException; case JET_errDatabaseAlreadyRunningMaintenance: return gcnew IsamDatabaseAlreadyRunningMaintenanceException; case JET_errRootSpaceLeakEstimationAlreadyRunning: diff --git a/dev/ese/src/noncore/interop/params.h b/dev/ese/src/noncore/interop/params.h index 2ad87dfc..48e19520 100644 --- a/dev/ese/src/noncore/interop/params.h +++ b/dev/ese/src/noncore/interop/params.h @@ -182,8 +182,7 @@ MSINTERNAL enum class MJET_PARAM AlternateDatabaseRecoveryPath = 113, // recovery-only - search for dirty-shutdown databases in specified location only Flight_ExtentPageCountCacheVerifyOnly = 114, // Verify values read from the Extent Page Count Cache rather than just returning them. Flight_EnablePgnoFDPLastSetTime = 115, // whether we want to enable setting PgnoPFDSetTime in the system table for a table entry. - Flight_EnableScanCheck2Flags = 116, // whether we want to enable logging flags in ScanCheck2 log record. - Flight_EnableExtentFreed2 = 117, // whether we want to enable logging ExtentFreed2 LR after the efv upgrade. + Flight_EnableFDPDeleteFlagCheckOnExtentFreedRedo = 116, // whether we want to check if FDP delete flag is set when we redo extent freed LR. Flight_RBSLargeRevertableDeletePages = 118, // Large revertable delete size for a table (in pages) beyond which we will track the deletes. Flight_RBSRevertableDeleteIfTooSoonTimeNull = 119, // If set, we will do a revertable table delete even if NonRevertableTableDelete flag is passed provided NonRevertable delete is failing due to JET_errRBSDeleteTableTooSoon due to time not being set. Note: If JET_bitRevertableTableDeleteIfTooSoon is set, this variant is ignored. IndexTupleIncrement = 132, // for tuple indexes, offset increment for each succesive tuple From 604e3408599cd7f17acede797b6ccbb0d3892cef Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Thu, 15 Sep 2022 15:24:07 +0000 Subject: [PATCH 024/102] ESE Block Cache: fix lost write caused by incorrect update of presence filter during eviction When we load a slab for the first time, we enumerate the entries and load the presence filter appropriately. On any update, we analyze the update and make a relative change to the presence filter. Because of how this works, we can only consider the contents of each updated slot to make the decision to add or remove entries from the presence filter. It turned out that tracking any "clean" block or the latest version of a "dirty" block was practical. When a block is evicted, we will remove it from the PF if it was a "clean" block or the current "dirty" block. When a block is cached, we will add it to the PF if it was a "clean" block or the first "dirty" of a block. This may result in two PF entries for the same block if we cached the "clean" page on a read miss and then dirtied it (until that obsolete image is evicted). This was a compromise because there is no state to determine that the first "dirty" of the block overrides a "clean" block in our current scheme. The bug was that my code to find the "clean" block was slightly incorrect. It recognized any clean block including one that was previously dirty, had been written back, and was now being evicted. I was testing !FDirty when I should have been testing !FDirty && !FEverDirty. This change also includes: - some cleanup to ErrLoad to clarify what it is doing - more fixes to the presence filter related to the standby list - remove wasn't updating the standby list - retiring a standby item had bad flow control - moved the call to Update from ErrUpdateSlabs to ErrScheduleSlabForWriteBack to clarify/guarantee it is always called for any slab update and document that it is only done at do time not redo time Other changes: - synctool.bat updated to find Product Studio binary that was moved [Substrate:2488950c19bed3c44f9cdbc0cf8ec27319035f66] --- .../src/os/blockcache/_hashedlrukcache.hxx | 114 ++++++++++-------- 1 file changed, 65 insertions(+), 49 deletions(-) diff --git a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx index 735f8f47..75f004db 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx @@ -3622,13 +3622,6 @@ class THashedLRUKCache Error( JET_errSuccess ); } - // if this slab contains changes that aren't accepted then ignore it - - if ( pcbs->FUpdated() ) - { - Error( JET_errSuccess ); - } - // visit all slots in the slab and load what is cached Call( pcbs->ErrVisitSlots( FLoadSlot_, (DWORD_PTR)this ) ); @@ -3796,12 +3789,13 @@ class THashedLRUKCache } BOOL FLoadSlot( _In_ const ERR errChunk, - _In_ const CCachedBlockSlotState& slotstAccepted, - _In_ const CCachedBlockSlotState& slotstCurrent ) + _In_ const CCachedBlockSlotState& slotstAccepted ) { - // ignore invalid slots + // NOTE: we only load accepted changes not pending changes - if ( !slotstCurrent.FValid() ) + // ignore invalid blocks + + if ( !slotstAccepted.FValid() ) { return fTrue; } @@ -3809,15 +3803,15 @@ class THashedLRUKCache OSTrace( JET_tracetagBlockCacheOperations, OSFormat( "C=%s Presence Filter %s Load Current", OSFormatFileId( m_pc ), - OSFormat( slotstCurrent ) ) ); + OSFormat( slotstAccepted ) ) ); - // ignore any slot that is superceded + // ignore any block that is superceded - if ( slotstCurrent.FSuperceded() ) + if ( slotstAccepted.FSuperceded() ) { - // except if the slot is clean, to match the tracking in FUpdateSlot + // except if the block is unmodified, to match the tracking in FUpdateSlot - if ( !slotstCurrent.FDirty() ) + if ( !slotstAccepted.FDirty() && !slotstAccepted.FEverDirty() ) { } else @@ -3828,13 +3822,13 @@ class THashedLRUKCache // remember that the cached block in this slot is cached - Add( slotstCurrent.IbSlab(), slotstCurrent.Cbid() ); + Add( slotstAccepted.IbSlab(), slotstAccepted.Cbid() ); // if we just finished the entire slab then verify that the counts are correct - Assert( slotstCurrent.Chno() < (ChunkNumber)( m_cChunkPerSlab - 1 ) || - slotstCurrent.Slno() < (SlotNumber)( m_cSlotPerChunk - 1 ) || - FValidCounts( slotstCurrent.IbSlab() ) ); + Assert( slotstAccepted.Chno() < (ChunkNumber)( m_cChunkPerSlab - 1 ) || + slotstAccepted.Slno() < (SlotNumber)( m_cSlotPerChunk - 1 ) || + FValidCounts( slotstAccepted.IbSlab() ) ); return fTrue; } @@ -3848,7 +3842,7 @@ class THashedLRUKCache Unused( pfnVisitSlot ); CCachedBlockPresenceFilter* const pcbpf = (CCachedBlockPresenceFilter*)keyVisitSlot; - return pcbpf->FLoadSlot( errChunk, slotstAccepted, slotstCurrent ); + return pcbpf->FLoadSlot( errChunk, slotstAccepted ); } BOOL FUpdateSlot( _In_ const ERR errChunk, @@ -3880,10 +3874,10 @@ class THashedLRUKCache // track cached blocks removed from the cache // - // - any clean block + // - an unmodified block (!FDirty && !FEverDirty) // - the current version of a dirty block (!FSuperceded) // - // we track clean blocks due to a limitation with add below + // we track unmodified blocks due to a limitation with add below if ( slotstAccepted.FValid() && !slotstCurrent.FValid() ) { @@ -3891,7 +3885,7 @@ class THashedLRUKCache slotstAccepted.Cbid().Fileid() != fileidInvalid && slotstAccepted.Cbid().Fileserial() != fileserialInvalid ) { - if ( !slotstAccepted.FDirty() || !slotstAccepted.FSuperceded() ) + if ( !slotstAccepted.FDirty() && !slotstAccepted.FEverDirty() || !slotstAccepted.FSuperceded() ) { Remove( slotstAccepted.IbSlab(), slotstAccepted.Cbid() ); } @@ -3900,12 +3894,13 @@ class THashedLRUKCache // track cached blocks added to the cache // - // - any clean block + // - an unmodified block (!FDirty && !FEverDirty) // - the first update of a block (FFirstUpdate) // - // we track both clean blocks and blocks that are first updated because we don't have enough state - // to only track either the clean block or the first dirty if the clean block was not previously - // cached just by looking at this one slot. ideally we would detect this and track only one + // we track both unmodified blocks and blocks that are first updated because we don't have enough + // state to only track either the unmodified block or the first dirty if the unmodified block was + // not previously cached just by looking at this one slot. ideally we would detect this and track + // only one if ( !slotstAccepted.FValid() && slotstCurrent.FValid() ) { @@ -3913,7 +3908,7 @@ class THashedLRUKCache slotstCurrent.Cbid().Fileid() != fileidInvalid && slotstCurrent.Cbid().Fileserial() != fileserialInvalid ) { - if ( !slotstCurrent.FDirty() || slotstCurrent.FFirstUpdate() ) + if ( !slotstCurrent.FDirty() && !slotstCurrent.FEverDirty() || slotstCurrent.FFirstUpdate() ) { Add( slotstCurrent.IbSlab(), slotstCurrent.Cbid() ); } @@ -4159,13 +4154,20 @@ class THashedLRUKCache } } + // if we didn't succeed then try to remove the item from the standby list + + if ( !fSucceeded ) + { + fSucceeded = FRemoveStandbyItem( dwHash ); + } + // this must succeed EnforceSz( fSucceeded, "HashedLRUKCachePresenceFilterRemove2" ); // try to retire an item from the standby list - RetireStandbyItem(); + RetireAnyStandbyItem(); } BOOL FPossiblyContains( _In_ const DWORD dwHash ) @@ -4276,12 +4278,24 @@ class THashedLRUKCache return fSucceeded; } - void RetireStandbyItem() + BOOL FRemoveStandbyItem( _In_ const DWORD dwHash ) + { + BOOL fSucceeded = fFalse; + + for ( int iStandby = 0; !fSucceeded && iStandby < m_cStandby; iStandby++ ) + { + fSucceeded = (DWORD)AtomicCompareExchange( (LONG*)&m_rgdwStandby[ iStandby ], dwHash, 0 ) == dwHash; + } + + return fSucceeded; + } + + void RetireAnyStandbyItem() { DWORD dwHash = 0; BOOL fSucceeded = fFalse; - fSucceeded = FRemoveStandbyItem( &dwHash ); + fSucceeded = FRemoveAnyStandbyItem( &dwHash ); if ( fSucceeded ) { @@ -4289,17 +4303,17 @@ class THashedLRUKCache WORD wFingerprint = WFingerprint( dwHash ); fSucceeded = FAdd( qwIndex, wFingerprint ); - } - if ( !fSucceeded ) - { - fSucceeded = FAddStandbyItem( dwHash ); - } + if ( !fSucceeded ) + { + fSucceeded = FAddStandbyItem( dwHash ); + } - EnforceSz( fSucceeded, "HashedLRUKCachePresenceFilterRetireStandbyItem" ); + EnforceSz( fSucceeded, "HashedLRUKCachePresenceFilterRetireStandbyItem" ); + } } - BOOL FRemoveStandbyItem( _Out_ DWORD* const pdwHash ) + BOOL FRemoveAnyStandbyItem( _Out_ DWORD* const pdwHash ) { BOOL fSucceeded = fFalse; @@ -7056,16 +7070,6 @@ ERR THashedLRUKCache::ErrUpdateSlabs( _Inout_opt_ ICachedBlockSlab** const OSFormat( *pcbu ) ) ); } - // update our cached block presence filter if necessary - - for ( size_t ipcbs = 0; ipcbs < cpcbs; ipcbs++ ) - { - if ( rgpcbs[ ipcbs ] && rgpcbs[ ipcbs ]->FUpdated() ) - { - m_pcbpf->Update( rgpcbs[ ipcbs ] ); - } - } - // schedule slabs for write back for ( size_t ipcbs = 0; ipcbs < cpcbs; ipcbs++ ) @@ -7122,6 +7126,18 @@ ERR THashedLRUKCache::ErrScheduleSlabForWriteBack( _In_opt_ ICachedBlock Call( ErrGetOrAddSlabWriteBackContext( pcbs, &pswb ) ); + // update our cached block presence filter if necessary + + if ( pcbs->FUpdated() ) + { + // only update if we are not in recovery where we unilaterally overwrite slab state + + if ( m_fRecovered ) + { + m_pcbpf->Update( pcbs ); + } + } + // accept any updates that were made to the slab. if this isn't called then the state is rolled back. this step // is what prevents us from accidentally writing back changes to the slab that were not journaled. also note that // even if this slab were saved before the updates were accepted, the unaccepted updates would not be written out From cf64cace68c2834dec0dda478a83b19b9ca3c1c0 Mon Sep 17 00:00:00 2001 From: Vakishan Date: Thu, 15 Sep 2022 21:47:46 +0000 Subject: [PATCH 025/102] Fix RBS delete flag check failing due to the check using an int instead of a bool [Substrate:e7ff29f83a84fff2980e250238cff9de28672fb9] --- dev/ese/src/ese/_log/logredo.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/ese/src/ese/_log/logredo.cxx b/dev/ese/src/ese/_log/logredo.cxx index 94cfab89..fe24eb28 100644 --- a/dev/ese/src/ese/_log/logredo.cxx +++ b/dev/ese/src/ese/_log/logredo.cxx @@ -9620,7 +9620,7 @@ ERR LOG::ErrLGRIRedoScanCheck( const LRSCANCHECK2 * const plrscancheck, BOOL* co Assert( cpage.CbPage() == UlParam( PinstFromIfmp( ifmp ), JET_paramDatabasePageSize ) ); const DBTIME dbtimePage = cpage.Dbtime(); const BOOL fInitDbtimePage = dbtimePage != 0 && dbtimePage != dbtimeShrunk; - const BOOL fPageFDPDelete = cpage.FPageFDPDelete(); + const BOOL fPageFDPDelete = !!( cpage.FPageFDPDelete() ); Expected( fInitDbtimePage || ( dbtimePage == dbtimeShrunk ) ); // dbtime 0 only usually comes from a completely uninit page (-1019). const DBTIME dbtimeCurrentInLogRec = plrscancheck->DbtimeCurrent(); From e2916786744a7b8177a773c8a08dfd3dc28fc734 Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Fri, 16 Sep 2022 00:14:08 +0000 Subject: [PATCH 026/102] ESE Block Cache: avoid corruption if we ever access cached file offsets >= 16TB When inspecting the code, I noticed that we have a potential corruption issue related to integer truncation. The HashedLRUKCache caches files using 4kb blocks. It only reserves 32 bits to store the block number. It also reserves 2^32-1 as an invalid block number. If the accessed offset of a file / 2^12 >= 2^32-1 (i.e. about 16TB) then the cache truncates the block number and Bad Things will happen. This change addresses this by preventing caching of these offsets. It doesn't seem likely at this time that we will have dbs in prod that are this large so this is fine for now. [Substrate:f8b8ec23f64da9e64e78651ac496ccf2c7f36366] --- .../src/os/blockcache/_hashedlrukcache.hxx | 93 +++++++++++++------ 1 file changed, 64 insertions(+), 29 deletions(-) diff --git a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx index 75f004db..8b0cc7b6 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx @@ -4793,7 +4793,8 @@ class THashedLRUKCache _In_ const BOOL fKnownNotCached, _Out_ QWORD* const pibSlab, _Out_ CCachedBlockId* const pcbid, - _Out_ BOOL* const pfPossiblyCached ); + _Out_ BOOL* const pfPossiblyCached, + _Out_ BOOL* const pfUncachable ); void FailIO( _In_ CRequest* const prequestIO, _In_ const ERR err ); ERR ErrEnsureInitSlabWriteBackHash() { return m_initOnceSlabWriteBackHash.Init( ErrInitSlabWriteBackHash_, this ); }; @@ -5741,10 +5742,18 @@ ERR THashedLRUKCache::ErrInvalidate( _In_ const VolumeId volumeid, QWORD ibSlab = 0; CCachedBlockId cbid; BOOL fPossiblyCached = fFalse; + BOOL fUncachable = fFalse; // determine if we are likely to have this cached block - Call( ErrIsPossiblyCached( pcfte, ibCachedBlock, fFalse, &ibSlab, &cbid, &fPossiblyCached ) ); + Call( ErrIsPossiblyCached( pcfte, ibCachedBlock, fFalse, &ibSlab, &cbid, &fPossiblyCached, &fUncachable ) ); + + // if we couldn't have cached this block then skip it + + if ( fUncachable ) + { + continue; + } // if the cached block is not possibly cached then skip this offset @@ -8494,6 +8503,7 @@ void THashedLRUKCache::RequestRead( _In_ CRequest* const preq QWORD ibSlab = 0; CCachedBlockId cbid; BOOL fPossiblyCached = fFalse; + BOOL fUncachable = fFalse; CCachedBlockSlot slot; BOOL fCached = fFalse; @@ -8501,7 +8511,7 @@ void THashedLRUKCache::RequestRead( _In_ CRequest* const preq // determine if we are likely to have this cached block - Call( ErrIsPossiblyCached( prequest->Pcfte(), ibCachedBlock, fFalse, &ibSlab, &cbid, &fPossiblyCached ) ); + Call( ErrIsPossiblyCached( prequest->Pcfte(), ibCachedBlock, fFalse, &ibSlab, &cbid, &fPossiblyCached, &fUncachable ) ); // if the cached block is possibly cached then determine if it is cached @@ -8609,7 +8619,7 @@ void THashedLRUKCache::RequestFinalizeRead( _In_ CRequest* const // determine if we should cache this request - const BOOL fCacheIfPossible = prequest->Cp() != cpDontCache && Pcconfig()->PctWrite() < 100; + const BOOL fCacheRequestIfPossible = prequest->Cp() != cpDontCache && Pcconfig()->PctWrite() < 100; // loop through the read by cached block potentially crossing many cached file blocks @@ -8617,17 +8627,23 @@ void THashedLRUKCache::RequestFinalizeRead( _In_ CRequest* const ibCachedBlock <= prequest->Offsets().IbEnd(); ibCachedBlock += cbCachedBlock ) { - QWORD ibSlab = 0; + QWORD ibSlab = 0; CCachedBlockId cbid; - BOOL fPossiblyCached = fFalse; + BOOL fPossiblyCached = fFalse; + BOOL fUncachable = fFalse; + BOOL fCacheIfPossible = fFalse; CCachedBlockSlot slot; - BOOL fCached = fFalse; + BOOL fCached = fFalse; BYTE* const pbCachedBlock = (BYTE*)prequest->PbData() + ibCachedBlock - prequest->Offsets().IbStart(); // determine if we are likely to have this cached block - Call( ErrIsPossiblyCached( prequest->Pcfte(), ibCachedBlock, !prequest->FCacheHit(), &ibSlab, &cbid, &fPossiblyCached ) ); + Call( ErrIsPossiblyCached( prequest->Pcfte(), ibCachedBlock, !prequest->FCacheHit(), &ibSlab, &cbid, &fPossiblyCached, &fUncachable ) ); + + // do not attempt to cache an uncachable block + + fCacheIfPossible = fCacheRequestIfPossible && !fUncachable; // if the cached block is possibly cached then determine if it is cached. otherwise, if we want to cache it // then ensure that we check to see if it is already cached @@ -8759,11 +8775,13 @@ void THashedLRUKCache::RequestWrite( _In_ CRequest* const preq ibCachedBlock <= prequest->Offsets().IbEnd(); ibCachedBlock += cbCachedBlock ) { - QWORD ibSlab = 0; + QWORD ibSlab = 0; CCachedBlockId cbid; - BOOL fPossiblyCached = fFalse; + BOOL fPossiblyCached = fFalse; + BOOL fUncachable = fFalse; + BOOL fCacheIfPossible = fFalse; CCachedBlockSlot slot; - BOOL fCached = fFalse; + BOOL fCached = fFalse; const BYTE* const pbCachedBlock = prequest->PbData() + ibCachedBlock - prequest->Offsets().IbStart(); @@ -8772,14 +8790,18 @@ void THashedLRUKCache::RequestWrite( _In_ CRequest* const preq // NOTE: we do not cache writes to sparse regions of a file to force them to be reallocated. this is // required to maintain file meta-data parity with uncached files - const BOOL fCacheIfPossible = ( prequest->Cp() != cpDontCache && - Pcconfig()->PctWrite() > 0 && - !prequest->Pcfte()->FSparse( ibCachedBlock, cbCachedBlock ) ) || - prequest->Cp() == cpPinned; + const BOOL fCacheRequestIfPossible = ( prequest->Cp() != cpDontCache && + Pcconfig()->PctWrite() > 0 && + !prequest->Pcfte()->FSparse( ibCachedBlock, cbCachedBlock ) ) || + prequest->Cp() == cpPinned; // determine if we are likely to have this cached block - Call( ErrIsPossiblyCached( prequest->Pcfte(), ibCachedBlock, fFalse, &ibSlab, &cbid, &fPossiblyCached ) ); + Call( ErrIsPossiblyCached( prequest->Pcfte(), ibCachedBlock, fFalse, &ibSlab, &cbid, &fPossiblyCached, &fUncachable ) ); + + // do not attempt to cache an uncachable block + + fCacheIfPossible = fCacheRequestIfPossible && !fUncachable; // if the cached block is possibly cached then determine if it is cached. otherwise, if we want to cache it // then ensure that we check to see if it is already cached @@ -9013,34 +9035,46 @@ ERR THashedLRUKCache::ErrIsPossiblyCached( _In_ CHashedLRUKCachedFileTab _In_ const BOOL fKnownNotCached, _Out_ QWORD* const pibSlab, _Out_ CCachedBlockId* const pcbid, - _Out_ BOOL* const pfPossiblyCached ) + _Out_ BOOL* const pfPossiblyCached, + _Out_ BOOL* const pfUncachable ) { - ERR err = JET_errSuccess; - QWORD ibSlab = 0; - BOOL fPossiblyCached = fFalse; + ERR err = JET_errSuccess; + CachedBlockNumber cbno = cbnoInvalid; + QWORD ibSlab = 0; + BOOL fPossiblyCached = fFalse; + BOOL fUncachable = fFalse; *pibSlab = 0; *pfPossiblyCached = fFalse; + *pfUncachable = fFalse; - // compute the cached block id for this offset + // compute the CachedBlockNumber and check for overflow or invalid values - new( pcbid ) CCachedBlockId( pcfte->Volumeid(), - pcfte->Fileid(), - pcfte->Fileserial(), - (CachedBlockNumber)( ibCachedBlock / cbCachedBlock ) ); + cbno = (CachedBlockNumber)( ibCachedBlock / cbCachedBlock ); + if ( ibCachedBlock != (QWORD)cbno * cbCachedBlock || cbno == cbnoInvalid ) + { + fUncachable = fTrue; + } + else + { + // compute the cached block id for this offset - // determine the slab that should hold this cached block + new( pcbid ) CCachedBlockId( pcfte->Volumeid(), pcfte->Fileid(), pcfte->Fileserial(), cbno ); - Call( m_pcbsmHash->ErrGetSlabForCachedBlock( *pcbid, &ibSlab ) ); + // determine the slab that should hold this cached block - // determine if it is possible that we have this cached block in the cache + Call( m_pcbsmHash->ErrGetSlabForCachedBlock( *pcbid, &ibSlab ) ); - fPossiblyCached = !fKnownNotCached && m_pcbpf->FPossiblyContains( ibSlab, *pcbid ); + // determine if it is possible that we have this cached block in the cache + + fPossiblyCached = !fKnownNotCached && m_pcbpf->FPossiblyContains( ibSlab, *pcbid ); + } // return the results *pibSlab = ibSlab; *pfPossiblyCached = fPossiblyCached; + *pfUncachable = fUncachable; HandleError: if ( err < JET_errSuccess ) @@ -9048,6 +9082,7 @@ HandleError: *pibSlab = 0; new( pcbid ) CCachedBlockId(); *pfPossiblyCached = fFalse; + *pfUncachable = fFalse; } return err; } From 92515e8d3cc39e7712967c47808fa2152ae93a35 Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Fri, 16 Sep 2022 15:11:47 +0000 Subject: [PATCH 027/102] ESE Block Cache: don't emit telemetry for invalid path when attaching a file to the cache The storage may legitimately not be present. We already ignored this error This avoids the noise for SSD Only Databases in production. [Substrate:a57badca49385ee3b4eed14a3378a3c58185b73b] --- dev/ese/src/os/blockcache/_filefilter.hxx | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dev/ese/src/os/blockcache/_filefilter.hxx b/dev/ese/src/os/blockcache/_filefilter.hxx index 72ea7ff7..ef599253 100644 --- a/dev/ese/src/os/blockcache/_filefilter.hxx +++ b/dev/ese/src/os/blockcache/_filefilter.hxx @@ -3156,6 +3156,13 @@ ERR TFileFilter::ErrAttach( _In_ const COffsets& offsetsFirstWrite ) err = ErrGetConfiguredCache(); if ( err < JET_errSuccess ) { + // silently ignore invalid path to cover the case when the storage doesn't exist, possibly intentionally + + if ( err == JET_errInvalidPath ) + { + Error( JET_errSuccess ); + } + Error( ErrCacheOpenFailure( "Open", err, JET_errSuccess ) ); } From d744184c79317beab8b94bb197f75fd94af7cbf2 Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Tue, 20 Sep 2022 15:11:46 +0000 Subject: [PATCH 028/102] ESE: add optional type check to read shadowed header When the EBC master enable was switched off, we tried to run recovery against a cached EDB file. This resulted in a recovery failure but with the weird error JET_errPageSizeMismatch. We should have failed with JET_errFileInvalidType instead. This change adds an optional type check to ErrUtilReadShadowedHeader etc. The caller must indicate which type of file they are attempting to access. If the file may be of any type then JET_filetypeUnknown may be passed. We will also allow JET_filetypeTempDatabase if JET_filetypeDatabase was indicated (i.e. accept the more specialized type). This change also cleaned up the cut&paste code duplication in ErrUtilReadSpecificShadowedHeader. [Substrate:af3a3ef50f46ad12f310a0aeaad5e471fdc95d2f] --- dev/ese/src/ese/_log/log.cxx | 1 + dev/ese/src/ese/_log/logredo.cxx | 2 + dev/ese/src/ese/_log/logutil.cxx | 1 + dev/ese/src/ese/_log/rstmap.cxx | 1 + dev/ese/src/ese/_osu/fileu.cxx | 115 +++++++++++++---------------- dev/ese/src/ese/db.cxx | 3 + dev/ese/src/ese/dbdump.cxx | 3 + dev/ese/src/ese/dbutil.cxx | 1 + dev/ese/src/ese/fmp.cxx | 2 +- dev/ese/src/ese/io.cxx | 4 + dev/ese/src/ese/jetapi.cxx | 8 +- dev/ese/src/ese/rbsdump.cxx | 2 + dev/ese/src/ese/repair.cxx | 2 + dev/ese/src/ese/revertsnapshot.cxx | 11 +-- dev/ese/src/inc/_osu/fileu.hxx | 3 + 15 files changed, 87 insertions(+), 72 deletions(-) diff --git a/dev/ese/src/ese/_log/log.cxx b/dev/ese/src/ese/_log/log.cxx index 127ed39d..3f0f20ea 100644 --- a/dev/ese/src/ese/_log/log.cxx +++ b/dev/ese/src/ese/_log/log.cxx @@ -2567,6 +2567,7 @@ ERR LOG::ErrLGReadCheckpoint( _In_ PCWSTR wszCheckpointFile, CHECKPOINT *pcheckp m_pinst, m_pinst->m_pfsapi, wszCheckpointFile, + JET_filetypeCheckpoint, (BYTE*)pcheckpoint, sizeof(CHECKPOINT), -1, diff --git a/dev/ese/src/ese/_log/logredo.cxx b/dev/ese/src/ese/_log/logredo.cxx index fe24eb28..3db3061a 100644 --- a/dev/ese/src/ese/_log/logredo.cxx +++ b/dev/ese/src/ese/_log/logredo.cxx @@ -4988,6 +4988,7 @@ ERR LOG::ErrLGRICheckRedoCreateDb( m_pinst, m_pinst->m_pfsapi, wszDbName, + JET_filetypeDatabase, (BYTE*)pdbfilehdr, g_cbPage, OffsetOf( DBFILEHDR, le_cbPageSize ) ); @@ -5236,6 +5237,7 @@ ERR LOG::ErrLGRICheckRedoAttachDb( m_pinst, m_pinst->m_pfsapi, wszDbName, + JET_filetypeDatabase, (BYTE*)pdbfilehdr, g_cbPage, OffsetOf( DBFILEHDR, le_cbPageSize ) ); diff --git a/dev/ese/src/ese/_log/logutil.cxx b/dev/ese/src/ese/_log/logutil.cxx index 66a0d77f..839343e8 100644 --- a/dev/ese/src/ese/_log/logutil.cxx +++ b/dev/ese/src/ese/_log/logutil.cxx @@ -33,6 +33,7 @@ ERR ErrLGCheckDBFiles( pinst, pinst->m_pfsapi, wszDatabase, + JET_filetypeDatabase, (BYTE*)pdbfilehdrDb, g_cbPage, OffsetOf( DBFILEHDR_FIX, le_cbPageSize ), diff --git a/dev/ese/src/ese/_log/rstmap.cxx b/dev/ese/src/ese/_log/rstmap.cxx index ffa88c16..d6467552 100644 --- a/dev/ese/src/ese/_log/rstmap.cxx +++ b/dev/ese/src/ese/_log/rstmap.cxx @@ -547,6 +547,7 @@ ERR ErrRstmapSoftCheckDBFiles( INST *pinst, RSTMAP * pDbMapEntry ) pinst, pinst->m_pfsapi, wszDatabase, + JET_filetypeDatabase, (BYTE*)pdbfilehdrDb, g_cbPage, OffsetOf( DBFILEHDR_FIX, le_cbPageSize ), diff --git a/dev/ese/src/ese/_osu/fileu.cxx b/dev/ese/src/ese/_osu/fileu.cxx index 12c91c57..d90d4d06 100644 --- a/dev/ese/src/ese/_osu/fileu.cxx +++ b/dev/ese/src/ese/_osu/fileu.cxx @@ -196,79 +196,63 @@ ERR ErrUtilReadSpecificShadowedHeader( const INST* const pinst, DB_HEADER_READER // for ( cbPageCandidate = cbPageCandidateMin ; cbPageCandidate <= cbPageCandidateMax && cbAlloc > 0 ; cbPageCandidate *= 2 ) { - if ( FRangeContains( ibRead, cbRead, 0 * cbPageCandidate, cbPageCandidate ) || - ( cbAlloc >= cbPageCandidate && - pfapiRead->ErrIORead( *tcHeader, QWORD( 0 * cbPageCandidate ), cbPageCandidate, pbRead + 0 * cbPageCandidate, qos ) >= JET_errSuccess ) ) + for ( int i = 0; i < 2; i++ ) { - ChecksumPage( pbRead + 0 * cbPageCandidate, - cbPageCandidate, - databaseHeader, - 0, - &checksumExpected, - &checksumActual ); - if ( checksumActual == checksumExpected || - BoolParam( JET_paramDisableBlockVerification ) ) + if ( FRangeContains( ibRead, cbRead, i * cbPageCandidate, cbPageCandidate ) || + ( cbAlloc >= ( i + 1 ) * cbPageCandidate && + pfapiRead->ErrIORead( *tcHeader, QWORD( i * cbPageCandidate ), cbPageCandidate, pbRead + i * cbPageCandidate, qos ) >= JET_errSuccess ) ) { - if ( ibPageSize >= 0 ) + ChecksumPage( pbRead + i * cbPageCandidate, + cbPageCandidate, + databaseHeader, + 0, + &checksumExpected, + &checksumActual ); + if ( checksumActual == checksumExpected || + BoolParam( JET_paramDisableBlockVerification ) ) { - if ( cbPageCandidate >= ibPageSize + sizeof( UnalignedLittleEndian ) ) + const ULONG filetype = *( ( UnalignedLittleEndian* )( pbRead + i * cbPageCandidate + offsetof( DBFILEHDR, le_filetype ) ) ); + if ( pdbHdrReader->filetype == JET_filetypeUnknown ) { - ULONG cbPageHeader = *( (UnalignedLittleEndian *)( pbRead + 0 * cbPageCandidate + ibPageSize ) ); - if ( FValidCbPage( cbPageHeader ) ) - { - if ( cbPageHeader == 0 ) - { - cbPageHeader = g_cbPageDefault; - } - if ( cbPageHeader == cbPageCandidate ) - { - cbPagePrimary = cbPageCandidate; - MergeRange( &ibRead, &cbRead, 0 * cbPageCandidate, cbPageCandidate ); - } - } } - } - else - { - cbPagePrimary = cbPageCandidate; - } - } - } - if ( FRangeContains( ibRead, cbRead, 1 * cbPageCandidate, cbPageCandidate ) || - ( cbAlloc >= 2 * cbPageCandidate && - pfapiRead->ErrIORead( *tcHeader, QWORD( 1 * cbPageCandidate ), cbPageCandidate, pbRead + 1 * cbPageCandidate, qos ) >= JET_errSuccess ) ) - { - ChecksumPage( pbRead + 1 * cbPageCandidate, - cbPageCandidate, - databaseHeader, - 0, - &checksumExpected, - &checksumActual ); - if ( checksumActual == checksumExpected || - BoolParam( JET_paramDisableBlockVerification ) ) - { - if ( ibPageSize >= 0 ) - { - if ( cbPageCandidate >= ibPageSize + sizeof( UnalignedLittleEndian ) ) + else if ( filetype == pdbHdrReader->filetype ) { - ULONG cbPageHeader = *( (UnalignedLittleEndian *)( pbRead + 1 * cbPageCandidate + ibPageSize ) ); - if ( FValidCbPage( cbPageHeader ) ) + } + else if ( filetype == JET_filetypeTempDatabase && pdbHdrReader->filetype == JET_filetypeDatabase ) + { + } + else if ( BoolParam( JET_paramDisableBlockVerification ) ) + { + } + else + { + Error( ErrERRCheck( JET_errFileInvalidType ) ); + } + + ULONG& cbPageOutput = i == 0 ? cbPagePrimary : cbPageSecondary; + if ( ibPageSize >= 0 ) + { + if ( cbPageCandidate >= ibPageSize + sizeof( UnalignedLittleEndian ) ) { - if ( cbPageHeader == 0 ) - { - cbPageHeader = g_cbPageDefault; - } - if ( cbPageHeader == cbPageCandidate ) + ULONG cbPageHeader = *( (UnalignedLittleEndian *)( pbRead + i * cbPageCandidate + ibPageSize ) ); + if ( FValidCbPage( cbPageHeader ) ) { - cbPageSecondary = cbPageCandidate; - MergeRange( &ibRead, &cbRead, 1 * cbPageCandidate, cbPageCandidate ); + if ( cbPageHeader == 0 ) + { + cbPageHeader = g_cbPageDefault; + } + if ( cbPageHeader == cbPageCandidate ) + { + cbPageOutput = cbPageCandidate; + MergeRange( &ibRead, &cbRead, i * cbPageCandidate, cbPageCandidate ); + } } } } - } - else - { - cbPageSecondary = cbPageCandidate; + else + { + cbPageOutput = cbPageCandidate; + } } } } @@ -488,6 +472,7 @@ LOCAL ERR ErrUtilIReadShadowedHeader( const INST* const pinst, IFileSystemAPI* const pfsapi, const WCHAR* const wszFileName, + _In_ const ULONG filetype, __out_bcount( cbHeader ) BYTE* pbHeader, const DWORD cbHeader, const LONG ibPageSize, @@ -509,6 +494,7 @@ LOCAL ERR ErrUtilIReadShadowedHeader( { headerRequestGoodOnly, // shadowedHeaderRequest wszFileName, // wszFileName + filetype, // filetype pbHeader, // pbHeader cbHeader, // cbHeader ibPageSize, // ibPageSize @@ -584,6 +570,7 @@ ERR ErrUtilReadShadowedHeader( const INST* const pinst, IFileSystemAPI* const pfsapi, const WCHAR* const wszFilePath, + _In_ const ULONG filetype, __out_bcount( cbHeader ) BYTE* pbHeader, const DWORD cbHeader, const LONG ibPageSize, @@ -601,7 +588,7 @@ ERR ErrUtilReadShadowedHeader( IFileAPI::fmfCached : IFileAPI::fmfNone ) ), &pfapi ) ); - Call( ErrUtilReadShadowedHeader( pinst, pfsapi, pfapi, pbHeader, cbHeader, ibPageSize, urhf, pcbHeaderActual, pShadowedHeaderStatus ) ); + Call( ErrUtilReadShadowedHeader( pinst, pfsapi, pfapi, filetype, pbHeader, cbHeader, ibPageSize, urhf, pcbHeaderActual, pShadowedHeaderStatus ) ); HandleError: delete pfapi; @@ -616,6 +603,7 @@ ERR ErrUtilReadShadowedHeader( const INST* const pinst, IFileSystemAPI* const pfsapi, IFileAPI* const pfapi, + _In_ const ULONG filetype, __out_bcount( cbHeader ) BYTE* pbHeader, const DWORD cbHeader, const LONG ibPageSize, @@ -641,6 +629,7 @@ ERR ErrUtilReadShadowedHeader( Call( ErrUtilIReadShadowedHeader( pinst, pfsapi, wszFilePath, + filetype, pbHeader, cbHeader, ibPageSize, diff --git a/dev/ese/src/ese/db.cxx b/dev/ese/src/ese/db.cxx index a582d102..4fbf1342 100644 --- a/dev/ese/src/ese/db.cxx +++ b/dev/ese/src/ese/db.cxx @@ -2679,6 +2679,7 @@ ERR ErrDBReadHeaderCheckConsistency( err = ErrUtilReadShadowedHeader( pfmp->Pinst(), pfsapi, pfapi, + JET_filetypeDatabase, (BYTE*)pdbfilehdr, g_cbPage, OffsetOf( DBFILEHDR, le_cbPageSize ), @@ -3958,6 +3959,7 @@ LOCAL ERR ErrDBIUpdateHeaderFromTrailer( pinst, pfsapi, wszDatabase, + JET_filetypeDatabase, reinterpret_cast( pdbfilehdr ), g_cbPage, OffsetOf( DBFILEHDR_FIX, le_cbPageSize ), @@ -6185,6 +6187,7 @@ ERR ISAMAPI ErrIsamSetDatabaseSize( JET_SESID sesid, const WCHAR *wszDatabase, D Call( ErrUtilReadShadowedHeader( PinstFromPpib( ppib ), pfsapi, pfapi, + JET_filetypeDatabase, (BYTE *)pdbfilehdr, g_cbPage, OffsetOf( DBFILEHDR_FIX, le_cbPageSize ) ) ); diff --git a/dev/ese/src/ese/dbdump.cxx b/dev/ese/src/ese/dbdump.cxx index 0bc9717d..a8efe40e 100644 --- a/dev/ese/src/ese/dbdump.cxx +++ b/dev/ese/src/ese/dbdump.cxx @@ -522,6 +522,7 @@ ERR ErrDUMPHeader( INST *pinst, _In_ PCWSTR wszDatabase, const BOOL fVerbose ) { headerRequestPrimaryOnly, wszDatabase, + JET_filetypeUnknown, NULL, cbHeader, OffsetOf( DBFILEHDR_FIX, le_cbPageSize ), @@ -537,6 +538,7 @@ ERR ErrDUMPHeader( INST *pinst, _In_ PCWSTR wszDatabase, const BOOL fVerbose ) { headerRequestSecondaryOnly, wszDatabase, + JET_filetypeUnknown, NULL, cbHeader, OffsetOf( DBFILEHDR_FIX, le_cbPageSize ), @@ -674,6 +676,7 @@ ERR ErrDUMPFixupHeader( INST *pinst, _In_ PCWSTR wszDatabase, const BOOL fVerbos { headerRequestGoodOnly, wszDatabase, + JET_filetypeUnknown, NULL, cbHeader, OffsetOf( DBFILEHDR_FIX, le_cbPageSize ), diff --git a/dev/ese/src/ese/dbutil.cxx b/dev/ese/src/ese/dbutil.cxx index c18cd73c..663aef4c 100644 --- a/dev/ese/src/ese/dbutil.cxx +++ b/dev/ese/src/ese/dbutil.cxx @@ -5808,6 +5808,7 @@ ERR ISAMAPI ErrIsamDBUtilities( JET_SESID sesid, JET_DBUTIL_W *pdbutil ) err = ErrUtilReadShadowedHeader( pinst, pinst->m_pfsapi, pfapi, + JET_filetypeDatabase, (BYTE*)pdbfilehdr, g_cbPage, OffsetOf( DBFILEHDR, le_cbPageSize ), diff --git a/dev/ese/src/ese/fmp.cxx b/dev/ese/src/ese/fmp.cxx index 9735b481..2e42ed77 100644 --- a/dev/ese/src/ese/fmp.cxx +++ b/dev/ese/src/ese/fmp.cxx @@ -1311,7 +1311,7 @@ ERR FMP::ErrNewAndWriteLatch( memset( pdbfilehdr, 0, g_cbPage ); // read in db header // note: read will fail from the newly created database path (when createdatabase() calls us) - const ERR errRH = ErrUtilReadShadowedHeader( pinst, pfsapi, wszDatabaseName, (BYTE*)pdbfilehdr, g_cbPage, -1, UtilReadHeaderFlags( urhfReadOnly | urhfNoEventLogging ) ); + const ERR errRH = ErrUtilReadShadowedHeader( pinst, pfsapi, wszDatabaseName, JET_filetypeDatabase, (BYTE*)pdbfilehdr, g_cbPage, -1, UtilReadHeaderFlags( urhfReadOnly | urhfNoEventLogging ) ); if ( errRH >= JET_errSuccess ) { AssertDatabaseHeaderConsistent( pdbfilehdr, g_cbPage, g_cbPage ); diff --git a/dev/ese/src/ese/io.cxx b/dev/ese/src/ese/io.cxx index a6629e4a..551de695 100644 --- a/dev/ese/src/ese/io.cxx +++ b/dev/ese/src/ese/io.cxx @@ -4765,6 +4765,7 @@ ERR CIrsOpContext::ErrCheckAttachedIrsContext( const INST * const pinst, PCWSTR err = ErrUtilReadShadowedHeader( pinst, pinst->m_pfsapi, m_pfapiDb, + JET_filetypeDatabase, (BYTE*)pdbfilehdrCheck, g_cbPage, OffsetOf( DBFILEHDR, le_cbPageSize ) ); @@ -4991,6 +4992,7 @@ ERR ErrIRSAttachDatabaseForIrsV2( _Inout_ INST * const pinst, _In_ PCWSTR wszDat err = ErrUtilReadShadowedHeader( pinst, pinst->m_pfsapi, pfapiDb, + JET_filetypeDatabase, (BYTE*)pdbfilehdr, g_cbPage, OffsetOf( DBFILEHDR, le_cbPageSize ) ); @@ -5853,6 +5855,7 @@ ERR ErrIsamEndDatabaseIncrementalReseed( err = ErrUtilReadShadowedHeader( pinst, pfsapi, pfapiCheckpoint, + JET_filetypeCheckpoint, (BYTE*)pcheckpoint, sizeof( CHECKPOINT ), -1, @@ -6487,6 +6490,7 @@ ERR ErrIsamRemoveLogfile( pinstNil, pfsapi, wszDatabase, + JET_filetypeDatabase, reinterpret_cast( pdbfilehdr ), g_cbPage, OffsetOf( DBFILEHDR_FIX, le_cbPageSize ), diff --git a/dev/ese/src/ese/jetapi.cxx b/dev/ese/src/ese/jetapi.cxx index 742d450f..571c7a51 100644 --- a/dev/ese/src/ese/jetapi.cxx +++ b/dev/ese/src/ese/jetapi.cxx @@ -13032,6 +13032,7 @@ LOCAL JET_ERR JetGetDatabaseFileInfoEx( pinstNil, pfsapi, wszFullDbName, + JET_filetypeDatabase, (BYTE*)pdbfilehdr, g_cbPage, OffsetOf( DBFILEHDR_FIX, le_cbPageSize ), @@ -13158,6 +13159,7 @@ LOCAL JET_ERR JetGetDatabaseFileInfoEx( Call( ErrUtilReadShadowedHeader( pinstNil, pfsapi, wszFullDbName, + InfoLevel == JET_DbInfoFileType ? JET_filetypeUnknown : JET_filetypeDatabase, (BYTE *)pdbfilehdr, sizeof( DBFILEHDR ), OffsetOf( DBFILEHDR_FIX, le_cbPageSize ), @@ -23312,7 +23314,7 @@ JET_ERR ErrTestHookCorruptOfflineFile( const JET_TESTHOOKCORRUPT * const pcorrup IFileAPI::fmfNone ), &pfapi ) ); - Call( ErrUtilReadShadowedHeader( pinstNil, pfsapi, pfapi, pbPageImage, g_cbPageMax, OffsetOf( DBFILEHDR_FIX, le_cbPageSize ), urhfNoFailOnPageMismatch ) ); + Call( ErrUtilReadShadowedHeader( pinstNil, pfsapi, pfapi, JET_filetypeDatabase, pbPageImage, g_cbPageMax, OffsetOf( DBFILEHDR_FIX, le_cbPageSize ), urhfNoFailOnPageMismatch ) ); cbPageSize = ((DBFILEHDR*)pbPageImage)->le_cbPageSize; Call( pfapi->ErrIORead( *TraceContextScope( iorpDirectAccessUtil ), @@ -23369,7 +23371,7 @@ JET_ERR ErrTESTHOOKAlterDatabaseFileHeader( const JET_TESTHOOKALTERDBFILEHDR * c Call( ErrOSFSCreate( g_pfsconfigGlobal, &pfsapi ) ); Call( pfsapi->ErrFileOpen( palterdbfilehdr->szDatabase, IFileAPI::fmfNone, &pfapiDatabase ) ); - Call( ErrUtilReadShadowedHeader( pinstNil, pfsapi, pfapiDatabase, (BYTE*)pdbfilehdr, (DWORD)g_cbPageMax, (LONG)OffsetOf( DBFILEHDR_FIX, le_cbPageSize ), urhfReadOnly|urhfNoFailOnPageMismatch, &cbPageSize, &shs ) ); + Call( ErrUtilReadShadowedHeader( pinstNil, pfsapi, pfapiDatabase, JET_filetypeDatabase, (BYTE*)pdbfilehdr, (DWORD)g_cbPageMax, (LONG)OffsetOf( DBFILEHDR_FIX, le_cbPageSize ), urhfReadOnly|urhfNoFailOnPageMismatch, &cbPageSize, &shs ) ); Call( CFlushMapForUnattachedDb::ErrGetPersistedFlushMapOrNullObjectIfRuntime( palterdbfilehdr->szDatabase, pdbfilehdr, pinstNil, &pfm ) ); @@ -24472,7 +24474,7 @@ LOCAL JET_ERR JetGetRBSFileInfoEx( Alloc( prbsfilehdr = (RBSFILEHDR * )PvOSMemoryPageAlloc( sizeof( RBSFILEHDR ), NULL ) ); - Call( ErrUtilReadShadowedHeader( pinstNil, pfsapi, pfapi, (BYTE*) prbsfilehdr, sizeof( RBSFILEHDR ), -1, urhfNoAutoDetectPageSize | urhfReadOnly | urhfNoEventLogging ) ); + Call( ErrUtilReadShadowedHeader( pinstNil, pfsapi, pfapi, JET_filetypeSnapshot, (BYTE*) prbsfilehdr, sizeof( RBSFILEHDR ), -1, urhfNoAutoDetectPageSize | urhfReadOnly | urhfNoEventLogging ) ); UtilLoadRBSinfomiscFromRBSfilehdr( ( JET_RBSINFOMISC* )pvResult, cbMax, ( RBSFILEHDR* )prbsfilehdr ); break; diff --git a/dev/ese/src/ese/rbsdump.cxx b/dev/ese/src/ese/rbsdump.cxx index c3dda626..ad70d094 100644 --- a/dev/ese/src/ese/rbsdump.cxx +++ b/dev/ese/src/ese/rbsdump.cxx @@ -406,6 +406,7 @@ ERR ErrDUMPRBSHeader( INST *pinst, _In_ PCWSTR wszRBS, const BOOL fVerbose ) { headerRequestPrimaryOnly, wszRBS, + JET_filetypeSnapshot, NULL, cbHeader, -1, @@ -421,6 +422,7 @@ ERR ErrDUMPRBSHeader( INST *pinst, _In_ PCWSTR wszRBS, const BOOL fVerbose ) { headerRequestSecondaryOnly, wszRBS, + JET_filetypeSnapshot, NULL, cbHeader, -1, diff --git a/dev/ese/src/ese/repair.cxx b/dev/ese/src/ese/repair.cxx index bfebb96f..b59eb0af 100644 --- a/dev/ese/src/ese/repair.cxx +++ b/dev/ese/src/ese/repair.cxx @@ -2141,6 +2141,7 @@ LOCAL ERR ErrREPAIRCheckHeader( err = ErrUtilReadShadowedHeader( pinst, pinst->m_pfsapi, wszDatabase, + JET_filetypeDatabase, reinterpret_cast( pdbfilehdr ), g_cbPage, OffsetOf( DBFILEHDR_FIX, le_cbPageSize ), @@ -8087,6 +8088,7 @@ LOCAL ERR ErrREPAIRChangeDBSignature( err = ErrUtilReadShadowedHeader( pinst, pinst->m_pfsapi, wszDatabase, + JET_filetypeDatabase, reinterpret_cast( pdbfilehdr ), g_cbPage, OffsetOf( DBFILEHDR, le_cbPageSize ), diff --git a/dev/ese/src/ese/revertsnapshot.cxx b/dev/ese/src/ese/revertsnapshot.cxx index ccb5a2b2..2ee4258d 100644 --- a/dev/ese/src/ese/revertsnapshot.cxx +++ b/dev/ese/src/ese/revertsnapshot.cxx @@ -846,7 +846,7 @@ LOCAL ERR ErrRBSLoadRbsGen( QwInstFileID( qwRBSFileID, pinst->m_iInstance, lRBSGen ), &pfapiRBS ) ); - err = ErrUtilReadShadowedHeader( pinst, pinst->m_pfsapi, pfapiRBS, (BYTE*) prbshdr, sizeof( RBSFILEHDR ), -1, urhfNoAutoDetectPageSize | urhfNoEventLogging ); + err = ErrUtilReadShadowedHeader( pinst, pinst->m_pfsapi, pfapiRBS, JET_filetypeSnapshot, (BYTE*) prbshdr, sizeof( RBSFILEHDR ), -1, urhfNoAutoDetectPageSize | urhfNoEventLogging ); if ( fDeleteCorruptUninitializedRBS && err == JET_errReadVerifyFailure ) { @@ -934,7 +934,7 @@ LOCAL ERR ErrRBSPerformLogChecks( Call( ErrRBSFilePathForGen_( wszRBSAbsRootDirPath, wszRBSBaseName, pinst->m_pfsapi, wszRBSAbsDirPath, sizeof( wszRBSAbsDirPath ), wszRBSAbsFilePath, cbOSFSAPI_MAX_PATHW, lRBSGen ) ); Call( CIOFilePerf::ErrFileOpen( pinst->m_pfsapi, pinst, wszRBSAbsFilePath, IFileAPI::fmfReadOnly, iofileRBS, qwRBSFileID, &pfapirbs ) ); - Call( ErrUtilReadShadowedHeader( pinst, pinst->m_pfsapi, pfapirbs, (BYTE*) &rbsfilehdr, sizeof( RBSFILEHDR ), -1, urhfNoAutoDetectPageSize | urhfReadOnly | urhfNoEventLogging ) ); + Call( ErrUtilReadShadowedHeader( pinst, pinst->m_pfsapi, pfapirbs, JET_filetypeSnapshot, (BYTE*) &rbsfilehdr, sizeof( RBSFILEHDR ), -1, urhfNoAutoDetectPageSize | urhfReadOnly | urhfNoEventLogging ) ); Assert( pinst->m_plog ); Assert( rbsfilehdr.rbsfilehdr.le_lGenMaxLogCopied >= rbsfilehdr.rbsfilehdr.le_lGenMinLogCopied ); @@ -1276,7 +1276,7 @@ ERR CRevertSnapshot::ErrSetRBSFileApi( _In_ IFileAPI *pfapiRBS ) Alloc( m_prbsfilehdrCurrent = (RBSFILEHDR *)PvOSMemoryPageAlloc( sizeof(RBSFILEHDR), NULL ) ); // Load the header in the snapshot based on the set file api - Call( ErrUtilReadShadowedHeader( m_pinst, m_pinst->m_pfsapi, m_pfapiRBS, (BYTE*) m_prbsfilehdrCurrent, sizeof( RBSFILEHDR ), -1, urhfNoAutoDetectPageSize | urhfReadOnly | urhfNoEventLogging ) ); + Call( ErrUtilReadShadowedHeader( m_pinst, m_pinst->m_pfsapi, m_pfapiRBS, JET_filetypeSnapshot, (BYTE*) m_prbsfilehdrCurrent, sizeof( RBSFILEHDR ), -1, urhfNoAutoDetectPageSize | urhfReadOnly | urhfNoEventLogging ) ); // Set the file time create of current RBS gen on the cleaner. if ( m_pinst->m_prbscleaner != NULL ) @@ -3545,7 +3545,7 @@ ERR RBSCleanerIOOperator::ErrRBSFileHeader( PCWSTR wszRBSFilePath, _Out_ RBSFILE Assert( pfsapi ); Call( CIOFilePerf::ErrFileOpen( pfsapi, m_pinst, wszRBSFilePath, IFileAPI::fmfReadOnly, iofileRBS, qwRBSFileID, &pfapiRBS ) ); - Call( ErrUtilReadShadowedHeader( m_pinst, pfsapi, pfapiRBS, (BYTE*) prbsfilehdr, sizeof( RBSFILEHDR ), -1, urhfNoAutoDetectPageSize | urhfReadOnly | urhfNoEventLogging ) ); + Call( ErrUtilReadShadowedHeader( m_pinst, pfsapi, pfapiRBS, JET_filetypeSnapshot, (BYTE*) prbsfilehdr, sizeof( RBSFILEHDR ), -1, urhfNoAutoDetectPageSize | urhfReadOnly | urhfNoEventLogging ) ); HandleError: if ( pfapiRBS ) @@ -4132,6 +4132,7 @@ ERR CRBSDatabaseRevertContext::ErrRBSDBRCInit( RBSATTACHINFO* prbsattachinfo, SI m_pinst, m_pinst->m_pfsapi, m_pfapiDb, + JET_filetypeDatabase, (BYTE*)m_pdbfilehdr, g_cbPage, OffsetOf( DBFILEHDR, le_cbPageSize ) ); @@ -5428,7 +5429,7 @@ ERR CRBSRevertContext::ErrRevertCheckpointInit() qwRBSRevertChkFileID, &m_pfapirbsrchk ) ); - err = ErrUtilReadShadowedHeader( m_pinst, pfsapi, m_pfapirbsrchk, (BYTE*) m_prbsrchk, sizeof( RBSREVERTCHECKPOINT ), -1, urhfNoAutoDetectPageSize ); + err = ErrUtilReadShadowedHeader( m_pinst, pfsapi, m_pfapirbsrchk, JET_filetypeRBSRevertCheckpoint, (BYTE*) m_prbsrchk, sizeof( RBSREVERTCHECKPOINT ), -1, urhfNoAutoDetectPageSize ); if ( err < JET_errSuccess ) { diff --git a/dev/ese/src/inc/_osu/fileu.hxx b/dev/ese/src/inc/_osu/fileu.hxx index add403cc..2aa6ed89 100644 --- a/dev/ese/src/inc/_osu/fileu.hxx +++ b/dev/ese/src/inc/_osu/fileu.hxx @@ -28,6 +28,7 @@ typedef struct tagDbHeaderReader { ShadowedHeaderRequest shadowedHeaderRequest; // in const WCHAR* wszFileName; // in + ULONG filetype; // in BYTE* pbHeader; // in DWORD cbHeader; // in LONG ibPageSize; // in @@ -58,6 +59,7 @@ ERR ErrUtilReadShadowedHeader( const INST* const pinst, IFileSystemAPI* const pfsapi, const WCHAR* const wszFilePath, + _In_ const ULONG filetype, __out_bcount( cbHeader ) BYTE* pbHeader, const DWORD cbHeader, const LONG ibPageSize, @@ -69,6 +71,7 @@ ERR ErrUtilReadShadowedHeader( const INST* const pinst, IFileSystemAPI* const pfsapi, IFileAPI* const pfapi, + _In_ const ULONG filetype, __out_bcount( cbHeader ) BYTE* pbHeader, const DWORD cbHeader, const LONG ibPageSize, From 7a1385f75b51d5a14845938a007234258b102d8d Mon Sep 17 00:00:00 2001 From: TAW Date: Tue, 20 Sep 2022 17:53:00 +0000 Subject: [PATCH 029/102] Slight normalization of ESENT vs ESE Tiny tweak so that the code is the same for esent and ese versions of eseutil, just a different data. Makes it simpler to remove strsafe.h. [Substrate:1c5be300a2241b54dcb1c15700cb09a318844641] --- dev/ese/src/eseutil/eseutil.cxx | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/dev/ese/src/eseutil/eseutil.cxx b/dev/ese/src/eseutil/eseutil.cxx index 2e3ca9be..017d3a23 100644 --- a/dev/ese/src/eseutil/eseutil.cxx +++ b/dev/ese/src/eseutil/eseutil.cxx @@ -46,6 +46,18 @@ ESESHADOW_LOCAL_DEFERRED_DLL_STATE #endif +// Base product name and version numbers reported by eseutil. +#ifdef ESENT +#define ESEUTIL_PRODUCT_NAME L"Windows(R)" +#define ESEUTIL_PRODUCT_MAJOR VER_PRODUCTMAJORVERSION +#define ESEUTIL_PRODUCT_MINOR VER_PRODUCTMINORVERSION +#else +#define ESEUTIL_PRODUCT_NAME L"Exchange Server" +#define ESEUTIL_PRODUCT_MAJOR PRODUCT_MAJOR +#define ESEUTIL_PRODUCT_MINOR PRODUCT_MINOR +#endif + + // In fake recovery without undo, we set the grbit for JET_bitRecoveryWithoutUndo, but // then the our callback control decides to do undo after all, unless the "/u" option // was passed. @@ -147,11 +159,7 @@ LOCAL const WCHAR * const wszUsageErr21 = L"Usage Error: Config store spec n LOCAL const WCHAR * const wszUsageErr22 = L"Usage Error: Invalid log generation range specification."; -#ifdef ESENT -LOCAL const WCHAR * const wszHelpDesc1 = L"DESCRIPTION: Database utilities for the Extensible Storage Engine for Microsoft(R) Windows(R)."; -#else // !ESENT -LOCAL const WCHAR * const wszHelpDesc1 = L"DESCRIPTION: Database utilities for the Extensible Storage Engine for Microsoft(R) Exchange Server."; -#endif // ESENT +LOCAL const WCHAR * const wszHelpDesc1 = L"DESCRIPTION: Database utilities for the Extensible Storage Engine for Microsoft(R) " ESEUTIL_PRODUCT_NAME L"."; LOCAL const WCHAR * const wszHelpSyntax = L"MODES OF OPERATION:"; LOCAL const WCHAR * const wszHelpModes1 = L" Defragmentation: %s /d [options]"; LOCAL const WCHAR * const wszHelpModes2 = L" Recovery: %s /r [options]"; @@ -213,16 +221,8 @@ LOCAL WCHAR *GetCurArg(); LOCAL VOID EDBUTLPrintLogo( void ) { - WCHAR wszVersion[16]; - -#ifdef ESENT - StringCbPrintfW( wszVersion, sizeof(wszVersion), L"%d.%d", VER_PRODUCTMAJORVERSION, VER_PRODUCTMINORVERSION ); - wprintf( L"Extensible Storage Engine Utilities for Microsoft(R) Windows(R)%c", wchNewLine ); -#else // !ESENT - StringCbPrintfW( wszVersion, sizeof(wszVersion), L"%hs.%hs", PRODUCT_MAJOR, PRODUCT_MINOR ); - wprintf( L"Extensible Storage Engine Utilities for Microsoft(R) Exchange Server%c", wchNewLine ); -#endif // ESENT - wprintf( L"Version %s%c", wszVersion, wchNewLine ); + wprintf( L"Extensible Storage Engine Utilities for Microsoft(R) %s%c", ESEUTIL_PRODUCT_NAME, wchNewLine ); + wprintf( L"Version %hs.%hs%c", ESEUTIL_PRODUCT_MAJOR, ESEUTIL_PRODUCT_MINOR, wchNewLine ); wprintf( L"Copyright (c) Microsoft Corporation.\nLicensed under the MIT License.%c", wchNewLine ); wprintf( L"%c", wchNewLine ); } From a508f6dd7c609721d2c822d158c03e1bfb229819 Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Thu, 22 Sep 2022 15:20:25 +0000 Subject: [PATCH 030/102] ESE: simplify hung io detection in BF to avoid accessing invalid IO contexts BF has code to detect hung IOs. It does this by capturing the pvIOContext provided by IFileAPI::PfnIOHandoff and later uses this to call IFileAPI::DtickIOElapsed. The problem with this code is that it can use the pvIOContext after it has been released by the IFileAPI implementation. Specifically, this happens because BF captures the pvIOContext in calls surrounding sync I/O calls such that there is a timing window where the pvIOContext has been released but it hasn't been removed from the associated BF. For COSFile, this is a minor annoyance because pvIOContext is an IOREQ and these are never released. Worst case, BF will check the wrong request. If it does, chances are excellent it won't be hung because obviously it was just reused. For the ESE Block Cache, this is a major annoyance. EBC's file system filter had to build an expensive scheme to validate these requests and return a safe answer if the CIOComplete had been released already. We had to pay this penalty even for files that were not attached to a cache. Even a probabilistic scheme like using a TPool with a min lifetime of 10s was not enough to avoid this hazard. This change addresses the root cause of this problem by eliminating the concept of a pvIOContext for sync I/O. A NULL will always be returned by IFileAPI::PfnIOHandoff for a sync I/O. BF has been changed to compensate for this by complicating the usage of BF::pvIOContext. For an async I/O it stores the pvIOContext from IFileAPI::PfnIOHandoff. For a sync I/O, it stores a TICK. These are distinguished by the least significant bit, which is set for a TICK and clear for a pvIOContext. BF uses the TICK to directly track the elapsed time of sync I/Os. EBC's expensive compensation scheme for this hazard was also removed and replaced with a pooled allocation for CIOComplete. [Substrate:bb29f616fd6a88422f6f2e74b690ab5b8d039b2f] --- dev/ese/published/inc/os/error.hxx | 6 +- dev/ese/published/inc/os/osfileapi.hxx | 2 + dev/ese/published/inc/os/time.hxx | 4 + dev/ese/src/ese/bf.cxx | 115 +++++++-------- dev/ese/src/inc/_bf.hxx | 28 ++-- dev/ese/src/os/_osfile.hxx | 2 +- dev/ese/src/os/blockcache/_blockcache.hxx | 1 - dev/ese/src/os/blockcache/_common.hxx | 17 +-- dev/ese/src/os/blockcache/_filefilter.hxx | 35 ++++- .../src/os/blockcache/_filefilterwrapper.hxx | 10 +- dev/ese/src/os/blockcache/_filewrapper.hxx | 127 +++++----------- dev/ese/src/os/blockcache/_iocompletehash.hxx | 135 ------------------ dev/ese/src/os/osblockcache.cxx | 1 + dev/ese/src/os/osfile.cxx | 8 +- dev/ese/src/os/time.cxx | 10 ++ 15 files changed, 163 insertions(+), 338 deletions(-) delete mode 100644 dev/ese/src/os/blockcache/_iocompletehash.hxx diff --git a/dev/ese/published/inc/os/error.hxx b/dev/ese/published/inc/os/error.hxx index cd234fef..08327908 100644 --- a/dev/ese/published/inc/os/error.hxx +++ b/dev/ese/published/inc/os/error.hxx @@ -22,13 +22,9 @@ const CHAR * SzSourceFileName( const CHAR * szFilePath ); // Prototypes // -#ifdef DEBUG - -// IsDebuggerAttached() is useful to silence Asserts. It shouldn't be used in -// retail code. +// indicates if a user mode debugger is attached to this process BOOL IsDebuggerAttached(); -#endif // ------------------------------------------------------------------------------------------------ diff --git a/dev/ese/published/inc/os/osfileapi.hxx b/dev/ese/published/inc/os/osfileapi.hxx index ff640105..e20080fe 100644 --- a/dev/ese/published/inc/os/osfileapi.hxx +++ b/dev/ese/published/inc/os/osfileapi.hxx @@ -377,6 +377,8 @@ class IFileAPI // fapi // to later surface a PfnIOComplete. If the ErrIORead / ErrIOWrite // functions are going to return a quota exceeded / errDiskTilt error, // this function will not be called. + // + // NOTE: pvIOContext is only provided for async I/O. typedef void (*PfnIOHandoff)( const ERR err, IFileAPI* const pfapi, diff --git a/dev/ese/published/inc/os/time.hxx b/dev/ese/published/inc/os/time.hxx index 029aa653..196be7e2 100644 --- a/dev/ese/published/inc/os/time.hxx +++ b/dev/ese/published/inc/os/time.hxx @@ -33,6 +33,10 @@ void OSTimeSetTimeInjection( const DWORD eTimeInjNegWrapMode, const TICK dtickTi TICK TickOSTimeCurrent(); +// returns the current timer tick count (1000 Hz) factoring out time the computer was hibernating or asleep + +TICK TickOSTimeInterruptCurrent(); + // performs an overflow aware comparison of two absolute tick counts INLINE LONG TickCmp( TICK tick1, TICK tick2 ) diff --git a/dev/ese/src/ese/bf.cxx b/dev/ese/src/ese/bf.cxx index 8560f44b..75d55799 100644 --- a/dev/ese/src/ese/bf.cxx +++ b/dev/ese/src/ese/bf.cxx @@ -24054,7 +24054,9 @@ ERR ErrBFIFlushLog( _In_ const IFMP ifmp, _In_ const IOFLUSHREASON iofr, const B // I/O -const void* const PV_IO_CTX_LOCK = (void*)upMax; +const void* const PV_IO_CTX_TICK_FLAG = (void*)(ULONG_PTR)1; +const void* const PV_IO_CTX_LOCK = (void*)( upMax & ~( (ULONG_PTR)PV_IO_CTX_TICK_FLAG ) ); + void* PvBFIAcquireIOContext( PBF pbf ) { void* pvIOContextOld = AtomicReadPointer( &pbf->pvIOContext ); @@ -24143,6 +24145,36 @@ void BFIResetIOContext( PBF pbf ) } } +void BFISetAsyncIOContext( _In_ const PBF pbf, _In_ void* const pvIOContextNew ) +{ + Assert( !( (ULONG_PTR)pvIOContextNew & (ULONG_PTR)PV_IO_CTX_TICK_FLAG ) ); + + BFISetIOContext( pbf, pvIOContextNew ); +} + +void BFIResetAsyncIOContext( _In_ const PBF pbf ) +{ + BFIResetIOContext( pbf ); +} + +TICK TickBFISyncIOContextStartTime() +{ + return (TICK)( (ULONG_PTR)TickOSTimeInterruptCurrent() | (ULONG_PTR)PV_IO_CTX_TICK_FLAG ); +} + +void BFISetSyncIOContext( _In_ const PBF pbf ) +{ + void* const pvIOContextNew = (void*)(ULONG_PTR)TickBFISyncIOContextStartTime(); + Assert( (ULONG_PTR)pvIOContextNew & (ULONG_PTR)PV_IO_CTX_TICK_FLAG ); + + BFISetIOContext( pbf, pvIOContextNew ); +} + +void BFIResetSyncIOContext( _In_ const PBF pbf ) +{ + BFIResetIOContext( pbf ); +} + BOOL FBFIIsIOHung( PBF pbf ) { void* const pvIOContext = PvBFIAcquireIOContext( pbf ); @@ -24162,12 +24194,14 @@ BYTE PctBFIIsIOHung( PBF pbf, void* const pvIOContext ) { IFileAPI* const pfapi = g_rgfmp[ pbf->ifmp ].Pfapi(); - const TICK dtickIOElapsed = pfapi->DtickIOElapsed( pvIOContext ); + const TICK dtickIOElapsed = ( (ULONG_PTR)pvIOContext & (ULONG_PTR)PV_IO_CTX_TICK_FLAG ) ? + DtickDelta( (TICK)(ULONG_PTR)pvIOContext, TickBFISyncIOContextStartTime() ) : + pfapi->DtickIOElapsed( pvIOContext ); const TICK dtickHungIO = (TICK)UlParam( JET_paramHungIOThreshold ); if ( dtickIOElapsed >= dtickHungIO ) { - return 100; + return IsDebuggerAttached() ? 99 : 100; } else { @@ -24317,14 +24351,17 @@ void BFISyncRead( PBF pbf, const OSFILEQOS qosIoPriorities, const TraceContext& if ( !FBFICacheViewCacheDerefIo( pbf ) ) { HRT hrtStart = HrtHRTCount(); + + BFISetSyncIOContext( pbf ); + err = pfapi->ErrIORead( tc, ibOffset, cbData, pbData, - qosIoUserDispatch | qosIOSignalSlowSyncIO, - NULL, // Passing a NULL pfnCompletion triggers sync I/O (foreground on this thread). - DWORD_PTR( pbf ), - IFileAPI::PfnIOHandoff( BFISyncReadHandoff ) ); + qosIoUserDispatch | qosIOSignalSlowSyncIO ); + + BFIResetSyncIOContext( pbf ); + BFITrackCacheMissLatency( pbf, hrtStart, ( tc.iorReason.Iorf() & iorfReclaimPageFromOS ) ? bftcmrReasonPagingFaultDb : bftcmrReasonSyncRead, qosIoPriorities, tc, err ); Ptls()->threadstats.cPageRead++; } @@ -24336,21 +24373,6 @@ void BFISyncRead( PBF pbf, const OSFILEQOS qosIoPriorities, const TraceContext& } -void BFISyncReadHandoff( const ERR err, - IFileAPI* const pfapi, - const FullTraceContext& tc, - const OSFILEQOS grbitQOS, - const QWORD ibOffset, - const DWORD cbData, - const BYTE* const pbData, - const PBF pbf, - void* const pvIOContext ) -{ - Assert( JET_errSuccess == err ); // Yeah!!! - - BFISetIOContext( pbf, pvIOContext ); -} - void BFISyncReadComplete( const ERR err, IFileAPI* const pfapi, const OSFILEQOS grbitQOS, @@ -24362,17 +24384,6 @@ void BFISyncReadComplete( const ERR err, { Assert( pbf->sxwl.FOwnWriteLatch() ); - // reset the I/O context, since the operation is officially completed. - - if ( AtomicReadPointer( &pbf->pvIOContext ) != NULL ) - { - BFIResetIOContext( pbf ); - } - else - { - Assert( FBFICacheViewCacheDerefIo( pbf ) ); - } - // read was successful if ( err >= 0 ) @@ -24547,10 +24558,9 @@ void BFIAsyncReadHandoff( const ERR err, void* const pvIOContext ) { Assert( JET_errSuccess == err ); // Yeah!!! - if ( pvIOContext != NULL ) { - BFISetIOContext( pbf, pvIOContext ); + BFISetAsyncIOContext( pbf, pvIOContext ); } else { @@ -24580,7 +24590,7 @@ void BFIAsyncReadComplete( const ERR err, if ( AtomicReadPointer( &pbf->pvIOContext ) != NULL ) { - BFIResetIOContext( pbf ); + BFIResetAsyncIOContext( pbf ); } else { @@ -24689,14 +24699,15 @@ ERR ErrBFISyncWrite( PBF pbf, const BFLatchType bfltHave, OSFILEQOS qos, const T // issue sync write + BFISetSyncIOContext( pbf ); + err = pfapi->ErrIOWrite( tc, ibOffset, cbData, pbData, - qos, - NULL, // Passing a NULL pfnCompletion triggers sync I/O (foreground on this thread). - DWORD_PTR( pbf ), - IFileAPI::PfnIOHandoff( BFISyncWriteHandoff ) ); + qos ); + + BFIResetSyncIOContext( pbf ); // complete sync write @@ -24707,21 +24718,6 @@ ERR ErrBFISyncWrite( PBF pbf, const BFLatchType bfltHave, OSFILEQOS qos, const T return err; } -void BFISyncWriteHandoff( const ERR err, - IFileAPI* const pfapi, - const FullTraceContext& tc, - const OSFILEQOS grbitQOS, - const QWORD ibOffset, - const DWORD cbData, - const BYTE* const pbData, - const PBF pbf, - void* const pvIOContext ) -{ - Assert( JET_errSuccess == err ); // Yeah!!! - - BFISetIOContext( pbf, pvIOContext ); -} - void BFISyncWriteComplete( const ERR err, IFileAPI* const pfapi, const FullTraceContext& tc, @@ -24732,10 +24728,6 @@ void BFISyncWriteComplete( const ERR err, const PBF pbf, const BFLatchType bfltHave ) { - // reset the I/O context, since the operation is officially completed. - - BFIResetIOContext( pbf ); - // trace that we have just written a page BFITraceWritePage( pbf, tc ); @@ -24932,8 +24924,7 @@ void BFIAsyncWriteHandoff( const ERR err, void* const pvIOContext ) { Assert( JET_errSuccess == err ); // Yeah!!! - - BFISetIOContext( pbf, pvIOContext ); + BFISetAsyncIOContext( pbf, pvIOContext ); Enforce( CmpLgpos( pbf->lgposModify, g_rgfmp[ pbf->ifmp ].LgposWaypoint() ) <= 0 ); // just for insurance @@ -25539,7 +25530,7 @@ void BFIAsyncWriteComplete( const ERR err, // reset the I/O context, since the operation is officially completed. - BFIResetIOContext( pbf ); + BFIResetAsyncIOContext( pbf ); // trace that we have just written a page diff --git a/dev/ese/src/inc/_bf.hxx b/dev/ese/src/inc/_bf.hxx index 22323fc4..cba7f6d1 100644 --- a/dev/ese/src/inc/_bf.hxx +++ b/dev/ese/src/inc/_bf.hxx @@ -277,7 +277,7 @@ struct BF // BF -- IFMP/PGNO buffer RCE* prceUndoInfoNext; // Undo Info chain - void* pvIOContext; // I/O context (in practice, an IOREQ) + void* pvIOContext; // I/O context (in practice, an IOREQ, async IO only) // 144 B ///////////////////////////////////////////////////////////////////////////////////// @@ -314,7 +314,7 @@ struct BF // BF -- IFMP/PGNO buffer CSXWLatch sxwl; // S/X/W Latch protecting this BF state and // its associated cached page - void* pvIOContext; // I/O context (in practice, an IOREQ) + void* pvIOContext; // I/O context (in practice, an IOREQ, async IO only) IFMP ifmp; // IFMP of this cached page PGNO pgno; // PGNO of this cached page @@ -1534,6 +1534,12 @@ void BFIReleaseIOContext( PBF pbf, void* const pvIOContext ); void BFISetIOContext( PBF pbf, void* const pvIOContextNew ); void BFIResetIOContext( PBF pbf ); +void BFISetAsyncIOContext( _In_ const PBF pbf, _In_ void* const pvIOContextNew ); +void BFIResetAsyncIOContext( _In_ const PBF pbf ); +TICK TickBFISyncIOContextStartTime(); +void BFISetSyncIOContext( _In_ const PBF pbf ); +void BFIResetSyncIOContext( _In_ const PBF pbf ); + BOOL FBFIIsIOHung( PBF pbf ); BYTE PctBFIIsIOHung( PBF pbf, void* const pvIOContext ); ERR ErrBFIFlushPendingStatus( PBF pbf ); @@ -1542,15 +1548,6 @@ void BFIPrepareReadPage( PBF pbf ); void BFIPrepareWritePage( PBF pbf ); void BFISyncRead( PBF pbf, const OSFILEQOS qosIoPriorities, const TraceContext& tc ); -void BFISyncReadHandoff( const ERR err, - IFileAPI *const pfapi, - const FullTraceContext& tc, - const OSFILEQOS grbitQOS, - const QWORD ibOffset, - const DWORD cbData, - const BYTE* const pbData, - const PBF pbf, - void* const pvIOContext ); void BFISyncReadComplete( const ERR err, IFileAPI *const pfapi, const OSFILEQOS grbitQOS, @@ -1592,15 +1589,6 @@ void BFIAsyncReadTempComplete( const ERR err, const IFMP ifmp); ERR ErrBFISyncWrite( PBF pbf, const BFLatchType bfltHave, OSFILEQOS qos, const TraceContext& tc ); -void BFISyncWriteHandoff( const ERR err, - IFileAPI *const pfapi, - const FullTraceContext& tc, - const OSFILEQOS grbitQOS, - const QWORD ibOffset, - const DWORD cbData, - const BYTE* const pbData, - const PBF pbf, - void* const pvIOContext ); void BFISyncWriteComplete( const ERR err, IFileAPI *const pfapi, const FullTraceContext& tc, diff --git a/dev/ese/src/os/_osfile.hxx b/dev/ese/src/os/_osfile.hxx index 25606a49..2e81a8da 100644 --- a/dev/ese/src/os/_osfile.hxx +++ b/dev/ese/src/os/_osfile.hxx @@ -412,7 +412,7 @@ class COSFile // osf CIOComplete* const piocomplete ); private: - static void IOSyncHandoff_( const ERR err, + static void IOSyncHandoff_( const ERR err, COSFile* const posf, const FullTraceContext& tc, const OSFILEQOS grbitQOS, diff --git a/dev/ese/src/os/blockcache/_blockcache.hxx b/dev/ese/src/os/blockcache/_blockcache.hxx index 7a664c4a..640658a1 100644 --- a/dev/ese/src/os/blockcache/_blockcache.hxx +++ b/dev/ese/src/os/blockcache/_blockcache.hxx @@ -23,7 +23,6 @@ #include "blockcache\_cachethreadlocalstoragebase.hxx" #include "blockcache\_cachethreadlocalstoragehash.hxx" #include "blockcache\_cachethreadlocalstorage.hxx" -#include "blockcache\_iocompletehash.hxx" #include "blockcache\_filewrapper.hxx" #include "blockcache\_cachedfileheader.hxx" #include "blockcache\_filefilter.hxx" diff --git a/dev/ese/src/os/blockcache/_common.hxx b/dev/ese/src/os/blockcache/_common.hxx index 90f3faea..58cb364d 100644 --- a/dev/ese/src/os/blockcache/_common.hxx +++ b/dev/ese/src/os/blockcache/_common.hxx @@ -24,7 +24,6 @@ const INT rankCacheRepository = 0; const INT rankRegisterIFilePerfAPI = 0; const INT rankFileFilter = 0; const INT rankFileIdentification = 0; -const INT rankIOCompleteHash = 0; const INT rankJournalSegment = 0; const INT rankIORangeLock = 0; const INT rankCachedFileSparseMap = 0; @@ -81,17 +80,6 @@ class COffsets }; -// Buffer of the same size as another type. - -template< class T > -class Buffer -{ - private: - - BYTE m_rgb[sizeof( T )]; -}; - - // Error handling. INLINE void BlockCacheNotableEvent( _In_opt_ const WCHAR* const wszCachingFilePath, @@ -434,6 +422,11 @@ class TPool Free_( pv ); } + static void Cleanup() + { + s_state.Release( s_state.m_il ); + } + private: static void* PvAllocate_( _In_ const size_t cb ) diff --git a/dev/ese/src/os/blockcache/_filefilter.hxx b/dev/ese/src/os/blockcache/_filefilter.hxx index ef599253..5c6e8a3c 100644 --- a/dev/ese/src/os/blockcache/_filefilter.hxx +++ b/dev/ese/src/os/blockcache/_filefilter.hxx @@ -2091,6 +2091,9 @@ class TFileFilter // ff // IO completion context for an IFileFilter implementation. +#pragma push_macro( "new" ) +#undef new + class CIOComplete : public TFileWrapper::CIOComplete { @@ -2146,13 +2149,36 @@ class TFileFilter // ff } } + using CPool = TPool; + + void* operator new( _In_ const size_t cb ) + { + return CPool::PvAllocate(); + } + + void* operator new( _In_ const size_t cb, _In_ const void* const pv ) + { + return (void*)pv; + } + + void operator delete( _In_opt_ void* const pv ) + { + void* pvT = pv; + CPool::Free( &pvT ); + } + BOOL FAccessingHeader() const { return m_psemCachedFileHeader != NULL; } void DoNotReleaseWriteBack() { m_fReleaseWriteback = fFalse; } - + + static void Cleanup() + { + CPool::Cleanup(); + } + static void Complete_( _In_ const ERR err, _In_ const VolumeId volumeid, _In_ const FileId fileid, @@ -2295,6 +2321,9 @@ class TFileFilter // ff volatile BOOL m_fReleaseResources; CIORequestPending m_iorequestpending; }; + +#pragma pop_macro( "new" ) + }; template< class I > @@ -3907,7 +3936,7 @@ ERR TFileFilter::ErrCacheMiss( _In_ const TraceContext& if ( pfnIOComplete || pfnIOHandoff ) { const BOOL fHeap = pfnIOComplete != NULL; - Alloc( piocomplete = new( fHeap ? new Buffer() : _malloca( sizeof( CIOComplete ) ) ) + Alloc( piocomplete = new( fHeap ? CIOComplete::CPool::PvAllocate() : _malloca( sizeof( CIOComplete ) ) ) CIOComplete( fHeap, this, iomCacheMiss, @@ -4177,7 +4206,7 @@ ERR TFileFilter::ErrWriteCommon( _In_ const IFileFilter::I if ( pfnIOComplete || pfnIOHandoff ) { const BOOL fHeap = pfnIOComplete != NULL; - Alloc( piocomplete = new( fHeap ? new Buffer() : _malloca( sizeof( CIOComplete ) ) ) + Alloc( piocomplete = new( fHeap ? CIOComplete::CPool::PvAllocate() : _malloca( sizeof( CIOComplete ) ) ) CIOComplete( fHeap, this, iom, diff --git a/dev/ese/src/os/blockcache/_filefilterwrapper.hxx b/dev/ese/src/os/blockcache/_filefilterwrapper.hxx index 23ae9bf1..1470fb94 100644 --- a/dev/ese/src/os/blockcache/_filefilterwrapper.hxx +++ b/dev/ese/src/os/blockcache/_filefilterwrapper.hxx @@ -103,7 +103,7 @@ ERR TFileFilterWrapper::ErrIORead( _In_ const TraceConte if ( pfnIOComplete || pfnIOHandoff ) { const BOOL fHeap = pfnIOComplete != NULL; - Alloc( piocomplete = new( fHeap ? new Buffer() : _malloca( sizeof( CIOComplete ) ) ) + Alloc( piocomplete = new( fHeap ? CIOComplete::CPool::PvAllocate() : _malloca( sizeof( CIOComplete ) ) ) CIOComplete( fHeap, this, ibOffset, @@ -161,7 +161,7 @@ ERR TFileFilterWrapper::ErrIOWrite( _In_ const TraceConte if ( pfnIOComplete || pfnIOHandoff ) { const BOOL fHeap = pfnIOComplete != NULL; - Alloc( piocomplete = new( fHeap ? new Buffer() : _malloca( sizeof( CIOComplete ) ) ) + Alloc( piocomplete = new( fHeap ? CIOComplete::CPool::PvAllocate() : _malloca( sizeof( CIOComplete ) ) ) CIOComplete( fHeap, this, ibOffset, @@ -219,7 +219,7 @@ ERR TFileFilterWrapper::ErrRead( _In_ const TraceContext& if ( pfnIOComplete || pfnIOHandoff ) { const BOOL fHeap = pfnIOComplete != NULL; - Alloc( piocomplete = new( fHeap ? new Buffer() : _malloca( sizeof( CIOComplete ) ) ) + Alloc( piocomplete = new( fHeap ? CIOComplete::CPool::PvAllocate() : _malloca( sizeof( CIOComplete ) ) ) CIOComplete( fHeap, this, ibOffset, @@ -279,7 +279,7 @@ ERR TFileFilterWrapper::ErrWrite( _In_ const TraceConte if ( pfnIOComplete || pfnIOHandoff ) { const BOOL fHeap = pfnIOComplete != NULL; - Alloc( piocomplete = new( fHeap ? new Buffer() : _malloca( sizeof( CIOComplete ) ) ) + Alloc( piocomplete = new( fHeap ? CIOComplete::CPool::PvAllocate() : _malloca( sizeof( CIOComplete ) ) ) CIOComplete( fHeap, this, ibOffset, @@ -335,6 +335,8 @@ class CFileFilterWrapper : public TFileFilterWrapper : TFileFilterWrapper( pff, iom ) { } + + static void Cleanup() { CIOComplete::Cleanup(); } }; diff --git a/dev/ese/src/os/blockcache/_filewrapper.hxx b/dev/ese/src/os/blockcache/_filewrapper.hxx index 7c1d32cd..f90671be 100644 --- a/dev/ese/src/os/blockcache/_filewrapper.hxx +++ b/dev/ese/src/os/blockcache/_filewrapper.hxx @@ -119,6 +119,9 @@ class TFileWrapper // fw // IO completion context for an IFileAPI implementation. +#pragma push_macro( "new" ) +#undef new + class CIOComplete { public: @@ -146,19 +149,35 @@ class TFileWrapper // fw m_fIOCompleteCalled( fFalse ), m_cref( 1 ) { - (void)ErrRegister( this ); + } + + using CPool = TPool; + + void* operator new( _In_ const size_t cb ) + { + return CPool::PvAllocate(); + } + + void* operator new( _In_ const size_t cb, _In_ const void* const pv ) + { + return (void*)pv; + } + + void operator delete( _In_opt_ void* const pv ) + { + void* pvT = pv; + CPool::Free( &pvT ); } static void Cleanup() { - s_iocompleteHash.Term(); + CPool::Cleanup(); } - protected: + protected: virtual ~CIOComplete() { - Unregister( this ); } virtual void CleanupBeforeAsyncIOCompletion() @@ -169,47 +188,18 @@ class TFileWrapper // fw TICK DtickIOElapsed() { - TICK dtick = 0; - - // determine if this is still a valid io context. this covers for a design flaw in pfnIOHandoff - // where the interface presumes that the value assigned to pvIOContext will exist forever. note - // that this same flaw can cause us to accidentally look at a pvIOContext that has already been - // reused + TICK dtick = 0; - CIOCompleteKey key( this ); - CIOCompleteHash::CLock lock; - CIOCompleteEntry entry; + // if the pvIOContext is null then we are the source of the information, otherwise call down + // to the inner IFileAPI implementation - s_iocompleteHash.ReadLockKey( key, &lock ); - - const BOOL fValid = ErrToErr( s_iocompleteHash.ErrRetrieveEntry( &lock, &entry ) ) == JET_errSuccess; - CMeteredSection::Group group = CMeteredSection::groupInvalidNil; - - if ( fValid ) - { - group = m_ms.Enter(); - } - - s_iocompleteHash.ReadUnlockKey( &lock ); - - if ( fValid ) + if ( !m_pvIOContext ) { - // if the pvIOContext is null then we are the source of the information, otherwise call down - // to the inner IFileAPI implementation - - if ( !m_pvIOContext ) - { - dtick = (DWORD)min( lMax, CmsecHRTFromHrtStart( m_hrtStart ) ); - } - else - { - dtick = m_pfapiInner->DtickIOElapsed( m_pvIOContext ); - } + dtick = (DWORD)min( lMax, CmsecHRTFromHrtStart( m_hrtStart ) ); } - - if ( group != CMeteredSection::groupInvalidNil ) + else { - m_ms.Leave( group ); + dtick = m_pfapiInner->DtickIOElapsed( m_pvIOContext ); } return dtick; @@ -396,51 +386,6 @@ class TFileWrapper // fw private: - static ERR ErrEnsureInitIOCompleteHash() { return s_initOnceIocompleteHash.Init( ErrInitIOCompleteHash_, NULL ); }; - static ERR ErrInitIOCompleteHash_( _In_ void* unused ) { return ErrToErr( s_iocompleteHash.ErrInit( 5.0, 1.0 ) ); } - - static ERR ErrRegister( _In_ CIOComplete* const piocomplete ) - { - ERR err = JET_errSuccess; - CIOCompleteKey key( piocomplete ); - CIOCompleteEntry entry( piocomplete ); - CIOCompleteHash::CLock lock; - BOOL fLocked = fFalse; - - Call( ErrEnsureInitIOCompleteHash() ); - - s_iocompleteHash.WriteLockKey( key, &lock ); - fLocked = fTrue; - Call( ErrToErr( s_iocompleteHash.ErrInsertEntry( &lock, entry ) ) ); - - HandleError: - if ( fLocked ) - { - s_iocompleteHash.WriteUnlockKey( &lock ); - } - return err; - } - - static void Unregister( _In_ CIOComplete* const piocomplete ) - { - CIOCompleteHash::CLock lock; - CIOCompleteEntry entry; - - s_iocompleteHash.WriteLockKey( CIOCompleteKey( piocomplete ), &lock ); - if ( ErrToErr( s_iocompleteHash.ErrRetrieveEntry( &lock, &entry ) ) == JET_errSuccess ) - { - CallS( ErrToErr( s_iocompleteHash.ErrDeleteEntry( &lock ) ) ); - } - s_iocompleteHash.WriteUnlockKey( &lock ); - - piocomplete->m_ms.Partition(); - } - - private: - - static CInitOnce< ERR, decltype( &ErrInitIOCompleteHash_ ), void* > s_initOnceIocompleteHash; - static CIOCompleteHash s_iocompleteHash; - typename CInvasiveList::CElement m_ile; const BOOL m_fIsHeapAlloc; IFileAPI* const m_pfapi; @@ -459,6 +404,8 @@ class TFileWrapper // fw CMeteredSection m_ms; }; +#pragma pop_macro( "new" ) + protected: ERR HandleReservedIOREQ( _In_ const TraceContext& tc, @@ -479,12 +426,6 @@ class TFileWrapper // fw const BOOL m_fReleaseOnClose; }; -template< class I > -CInitOnce< ERR, decltype( &TFileWrapper::CIOComplete::ErrInitIOCompleteHash_ ), void* > TFileWrapper::CIOComplete::s_initOnceIocompleteHash; - -template< class I > -CIOCompleteHash TFileWrapper::CIOComplete::s_iocompleteHash( rankIOCompleteHash ); - template< class I > TFileWrapper::TFileWrapper( _In_ I* const pi ) : m_piInner( pi ), @@ -634,7 +575,7 @@ ERR TFileWrapper::ErrIORead( const TraceContext& tc, if ( pfnIOComplete || pfnIOHandoff ) { const BOOL fHeap = pfnIOComplete != NULL; - Alloc( piocomplete = new( fHeap ? new Buffer() : _malloca( sizeof( CIOComplete ) ) ) + Alloc( piocomplete = new( fHeap ? CIOComplete::CPool::PvAllocate() : _malloca( sizeof( CIOComplete ) ) ) CIOComplete( fHeap, this, ibOffset, @@ -692,7 +633,7 @@ ERR TFileWrapper::ErrIOWrite( const TraceContext& tc, if ( pfnIOComplete || pfnIOHandoff ) { const BOOL fHeap = pfnIOComplete != NULL; - Alloc( piocomplete = new( fHeap ? new Buffer() : _malloca( sizeof( CIOComplete ) ) ) + Alloc( piocomplete = new( fHeap ? CIOComplete::CPool::PvAllocate() : _malloca( sizeof( CIOComplete ) ) ) CIOComplete( fHeap, this, ibOffset, diff --git a/dev/ese/src/os/blockcache/_iocompletehash.hxx b/dev/ese/src/os/blockcache/_iocompletehash.hxx deleted file mode 100644 index fe4accc6..00000000 --- a/dev/ese/src/os/blockcache/_iocompletehash.hxx +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -#pragma once - -// CIOComplete table key. - -class CIOCompleteKey -{ - public: - - CIOCompleteKey() - : m_piocomplete( NULL ) - { - } - - CIOCompleteKey( _In_ const void* const piocomplete ) - : m_piocomplete( piocomplete ) - { - } - - CIOCompleteKey( _In_ const CIOCompleteKey& src ) - { - *this = src; - } - - const CIOCompleteKey& operator=( _In_ const CIOCompleteKey& src ) - { - m_piocomplete = src.m_piocomplete; - - return *this; - } - - const void* Piocomplete() const { return m_piocomplete; } - UINT UiHash() const { return CIOCompleteKey::UiHash( m_piocomplete ); } - - static UINT UiHash( _In_ const void* const piocomplete ) { return (UINT)( (UINT_PTR)piocomplete / sizeof( UINT_PTR ) ); } - - private: - - const void* m_piocomplete; -}; - -// Cached block slab table entry. - -class CIOCompleteEntry -{ - public: - - CIOCompleteEntry() - : m_piocomplete( NULL ) - { - } - - CIOCompleteEntry( _In_ const void* const piocomplete ) - : m_piocomplete( piocomplete ) - { - } - - CIOCompleteEntry( _In_ const CIOCompleteEntry& src ) - { - *this = src; - } - - const CIOCompleteEntry& operator=( _In_ const CIOCompleteEntry& src ) - { - m_piocomplete = src.m_piocomplete; - - return *this; - } - - const void* Piocomplete() const { return m_piocomplete; } - UINT UiHash() const { return CIOCompleteKey::UiHash( m_piocomplete ); } - - private: - - const void* m_piocomplete; -}; - -// Cached file hash table. - -typedef CDynamicHashTable CIOCompleteHash; - -INLINE CIOCompleteHash::NativeCounter CIOCompleteHash::CKeyEntry::Hash( _In_ const CIOCompleteKey& key ) -{ - return CIOCompleteHash::NativeCounter( key.UiHash() ); -} - -INLINE CIOCompleteHash::NativeCounter CIOCompleteHash::CKeyEntry::Hash() const -{ - return CIOCompleteHash::NativeCounter( m_entry.UiHash() ); -} - -INLINE BOOL CIOCompleteHash::CKeyEntry::FEntryMatchesKey( _In_ const CIOCompleteKey& key ) const -{ - if ( m_entry.Piocomplete() != key.Piocomplete() ) - { - return fFalse; - } - - return fTrue; -} - -INLINE void CIOCompleteHash::CKeyEntry::SetEntry( _In_ const CIOCompleteEntry& entry ) -{ - m_entry = entry; -} - -INLINE void CIOCompleteHash::CKeyEntry::GetEntry( _In_ CIOCompleteEntry* const pentry ) const -{ - *pentry = m_entry; -} - -template<> -INLINE ERR ErrToErr( _In_ const typename CIOCompleteHash::ERR err ) -{ - switch ( err ) - { - case CIOCompleteHash::ERR::errSuccess: - return JET_errSuccess; - case CIOCompleteHash::ERR::errOutOfMemory: - return ErrERRCheck( JET_errOutOfMemory ); - case CIOCompleteHash::ERR::errInvalidParameter: - return ErrERRCheck( JET_errInvalidParameter ); - case CIOCompleteHash::ERR::errEntryNotFound: - return ErrERRCheck( JET_errRecordNotFound ); - case CIOCompleteHash::ERR::errNoCurrentEntry: - return ErrERRCheck( JET_errNoCurrentRecord ); - case CIOCompleteHash::ERR::errKeyDuplicate: - return ErrERRCheck( JET_errKeyDuplicate ); - default: - Assert( fFalse ); - return ErrERRCheck( JET_errInternalError ); - } -} diff --git a/dev/ese/src/os/osblockcache.cxx b/dev/ese/src/os/osblockcache.cxx index 52ac4ce3..ae545b48 100644 --- a/dev/ese/src/os/osblockcache.cxx +++ b/dev/ese/src/os/osblockcache.cxx @@ -338,5 +338,6 @@ BOOL FOSBlockCachePreinit() void OSBlockCachePostterm() { CFileWrapper::Cleanup(); + CFileFilterWrapper::Cleanup(); CFileFilter::Cleanup(); } \ No newline at end of file diff --git a/dev/ese/src/os/osfile.cxx b/dev/ese/src/os/osfile.cxx index 5edbc9f5..a9a4de36 100644 --- a/dev/ese/src/os/osfile.cxx +++ b/dev/ese/src/os/osfile.cxx @@ -2125,6 +2125,10 @@ void COSFile::IOSyncHandoff_( const ERR err, void* const pioreq ) { Assert( ( piocomplete != NULL ) && ( piocomplete->m_pfnIOHandoff != NULL ) ); + + // NOTE: we intentionally do not provide the pvIOContext (i.e. pioreq) for a sync I/O. there is no safe way to + // use that context because there is no notification of when it becomes invalid / no longer used by ths I/O. + piocomplete->m_pfnIOHandoff( err, posf, tc, @@ -2133,7 +2137,7 @@ void COSFile::IOSyncHandoff_( const ERR err, cbData, pbData, piocomplete->m_keyIOComplete, - pioreq ); + NULL ); } void COSFile::IOSyncComplete( const ERR err, @@ -3186,4 +3190,4 @@ TICK COSFile::DtickIOElapsed( void* const pvIOContext ) const QWORD cmsecIOElapsed = CmsecLatencyOfOSOperation( pioreq ); return (TICK)min( cmsecIOElapsed, dwMax ); -} +} \ No newline at end of file diff --git a/dev/ese/src/os/time.cxx b/dev/ese/src/os/time.cxx index ae227443..d7167e7e 100644 --- a/dev/ese/src/os/time.cxx +++ b/dev/ese/src/os/time.cxx @@ -165,6 +165,16 @@ TICK TickOSTimeCurrent() return tickCurrent; } +// returns the current timer tick count (1000 Hz) factoring out time the computer was hibernating or asleep + +TICK TickOSTimeInterruptCurrent() +{ + QWORD qwUnbiasedTime = 0; + QueryUnbiasedInterruptTime( &qwUnbiasedTime ); + + return TICK( qwUnbiasedTime / 10000 ); +} + // High Resolution Timer From b06d945e70981dc6d0ef0c89caf3c0b6d372748a Mon Sep 17 00:00:00 2001 From: TAW Date: Fri, 23 Sep 2022 07:08:51 +0000 Subject: [PATCH 031/102] Some re-org of our types Incremental Linux port work. 1) Start the reorganization of cc.hxx to define the final typenames we're going to use first, then typedef some versions we're currently using. 2) Check in initial version of gcr2.cmd, a batch file that will (eventually) automatically rename our plethora of types to just the finaly typenames. [Substrate:1035d29b1e01605741f1820a81309c2fd3420d66] --- dev/ese/published/inc/cc.hxx | 465 ++++++++++++++-------------- dev/ese/published/inc/os/string.hxx | 12 - dev/ese/published/inc/os/types.hxx | 14 - 3 files changed, 238 insertions(+), 253 deletions(-) diff --git a/dev/ese/published/inc/cc.hxx b/dev/ese/published/inc/cc.hxx index 701d3a18..dcbc82af 100644 --- a/dev/ese/published/inc/cc.hxx +++ b/dev/ese/published/inc/cc.hxx @@ -11,293 +11,299 @@ // Some interesting defines we might try ... #ifdef _MSC_VER -#ifndef WINNT -//#define WINNT 1 -#endif + + #ifndef WINNT + //#define WINNT 1 + #endif + #else -//#define UNIX 1 -//#define _GCC 1 + + //#define UNIX 1 + //#define _GCC 1 + #endif -// -// SAL is not defined everywhere -// -#ifndef _MSC_VER - -#define _In_ -#define _Out_ -#define _Out_opt_ -#define _Inout_ -#define _In_count_(x) -#define _In_reads_(x) -#define _In_reads_opt_(x) -#define _In_reads_bytes_(x) -#define _In_reads_bytes_opt_(x) -#define _Inout_updates_bytes_(x) -#define _Inout_updates_opt_(x) -#define _Out_writes_(x) -#define _Out_writes_to_opt_(x, y) -#define _Out_writes_bytes_(x) -#define _Out_writes_bytes_opt_(x) -#define _Out_writes_bytes_to_(x, y) -#define _Out_writes_bytes_to_opt_(x, y) -#define _Outptr_result_buffer_(x) -#define _Null_terminated_ -#define _Return_type_success_(x) -#define _Field_size_(x) -#define _Field_size_opt_(x) -#define _Field_size_bytes_(x) -#define _Field_size_bytes_opt_(x) - -#else // _MSC_VER - -#include - -// These conflict with definitions in headers such as on non-Windows platforms. -// _In_ and _Out_ should be used instead anyway, according to Microsoft's SAL documentation. -// -// Unfortunately, can't easily redefine __in and __out because these old-style annotations -// crept back into the Windows headers ntsecapi.h and dbgeng.h, which we include // -// #undef __in -// #undef __out +// Source Annotation Language (SAL) // -// #define __in Use_In_instead_of__in -// #define __out Use_Out_instead_of__out - -#endif // !_MSC_VER -// Like SAL this produces a compile-time assert ... -#define C_ASSERT(e) typedef char __C_ASSERT__[(e)?1:-1] +#ifdef _MSC_VER + #include + + // These conflict with definitions in headers such as on non-Windows platforms. + // _In_ and _Out_ should be used instead anyway, according to Microsoft's SAL documentation. + // + // Unfortunately, can't easily redefine __in and __out because these old-style annotations + // crept back into the Windows headers ntsecapi.h and dbgeng.h, which we include + // + // #undef __in + // #undef __out + // + // #define __in Use_In_instead_of__in + // #define __out Use_Out_instead_of__out + +#else // !_MSC_VER + + // SAL is not defined everywhere + // + + #define _In_ + #define _Out_ + #define _Out_opt_ + #define _Inout_ + #define _In_count_(x) + #define _In_reads_(x) + #define _In_reads_opt_(x) + #define _In_reads_bytes_(x) + #define _In_reads_bytes_opt_(x) + #define _Inout_updates_bytes_(x) + #define _Inout_updates_opt_(x) + #define _Out_writes_(x) + #define _Out_writes_to_opt_(x, y) + #define _Out_writes_bytes_(x) + #define _Out_writes_bytes_opt_(x) + #define _Out_writes_bytes_to_(x, y) + #define _Out_writes_bytes_to_opt_(x, y) + #define _Outptr_result_buffer_(x) + #define _Null_terminated_ + #define _Return_type_success_(x) + #define _Field_size_(x) + #define _Field_size_opt_(x) + #define _Field_size_bytes_(x) + #define _Field_size_bytes_opt_(x) + +#endif // // Types // -#ifndef _MSC_VER -// the required intXX_t types are std on VC - windows as well, but we can't define -// them commonly b/c we get redefinition of basic types conflicts on WINNT. -#include -#endif - // odd void indirection #pragma push_macro( "VOID" ) #undef VOID -typedef void VOID; +typedef void VOID; #pragma pop_macro( "VOID" ) -typedef VOID * PVOID; - -// Boolean types -// - -// ESE's standard BOOL is 4 bytes, unlike bool which is 1 byte. This is used -// in a bunch of persisted structures and such, so changing it to bool is non- -// trivial. We will fix it at 4 bytes for now. Besides if you really wanted -// to save space, just use a bit-field. -#ifdef _MSC_VER - typedef int BOOL; -#else - typedef int32_t BOOL; -#endif - -// Another complication, the signed BOOL and C++ bool are unsuitable for bit fields -// of 1-bit size, due to the way C sign extends 1 to be 0xFFFFFFFF. This type is -// designed for usage in bit fields involving 4-byte types (INT, ULONG, etc) without -// these sign extension problems. -typedef unsigned int FLAG32; - -#define fFalse BOOL( 0 ) -#define fTrue BOOL( !0 ) +typedef VOID * PVOID; // String types +// We base our strings off of char from the C++ standard and wchar_t +typedef char CHAR; +typedef unsigned char UCHAR; +typedef _Null_terminated_ CHAR * PSTR; +typedef _Null_terminated_ const CHAR * PCSTR; +typedef wchar_t WCHAR; +typedef _Null_terminated_ WCHAR * PWSTR; +typedef _Null_terminated_ const WCHAR * PCWSTR; + +// We'll base our integral types on the types from stdint.h, a standards-defined file that's guaranteed to be +// equivalent on all platforms. // +#include -typedef char CHAR; -typedef CHAR *LPSTR; - - -// Basic integer types +// Constant sized integer types // - -#ifdef _MSC_VER - typedef short SHORT, *PSHORT; - typedef unsigned short USHORT, *PUSHORT; - typedef int INT, *PINT; - typedef unsigned int UINT, *PUINT; - typedef long LONG, *PLONG; - typedef unsigned long ULONG, *PULONG; - typedef long long LONGLONG, *PLONGLONG; - typedef unsigned long long ULONGLONG, *PULONGLONG; -#else - // On most other platforms, int and long are 64-bit on 64-bit platforms, but the ESE format - // is dependent upon LONG being 32-bits. - typedef int16_t SHORT; - typedef uint16_t USHORT; - typedef int32_t INT; - typedef uint32_t UINT; - typedef int32_t LONG; - typedef uint32_t ULONG; - typedef int64_t LONGLONG; - typedef uint64_t ULONGLONG; -#endif - -// Machine word types +typedef int8_t INT8; +typedef uint8_t UINT8; +typedef int16_t INT16; +typedef uint16_t UINT16; +typedef int32_t INT32; +typedef uint32_t UINT32; +typedef int64_t INT64; +typedef uint64_t UINT64; + +// Variable sized integer types, based on pointer-sized integer types // +typedef intptr_t INT_PTR; +typedef uintptr_t UINT_PTR; +typedef uintptr_t SIZE_T; -typedef unsigned char BYTE, *PBYTE; -typedef USHORT WORD, *PWORD; -typedef ULONG DWORD, *PDWORD; -typedef ULONGLONG QWORD, *PQWORD; - -// Pointer types +// Idealized machine word types // +typedef UINT8 BYTE; +typedef UINT16 WORD; +typedef unsigned long DWORD; // Note the unsafe use of long. Currently required to interop with + // Windows headers without a bunch of casts. +typedef UINT64 QWORD; + +// ESE's standard BOOL is 4 bytes, unlike C++'s bool which is 1 byte. This is +// used in a bunch of persisted structures and such, so changing it to bool is +// non-trivial. We will fix it at 4 bytes. Besides if you really wanted to +// save space, just use a bit-field. +typedef INT32 BOOL; +#define fFalse BOOL( 0 ) +#define fTrue BOOL( !0 ) -#if defined(_WIN64) - #ifdef _MSC_VER - typedef unsigned __int64 UNSIGNED_PTR; - typedef __int64 SIGNED_PTR; - #else // !_MSC_VER - typedef unsigned long UNSIGNED_PTR; - typedef long SIGNED_PTR; - #endif // _MSC_VER -#else - typedef unsigned long UNSIGNED_PTR; - typedef long SIGNED_PTR; -#endif - - -typedef LONGLONG LONG64; -typedef unsigned int DWORD32; -typedef unsigned int ULONG32; -typedef ULONGLONG ULONG64; - - -//typedef long long INT64; -//typedef unsigned long long UINT64; -#ifndef _MSC_VER - typedef long long __int64; -#endif - -#if defined(_WIN64) - #ifdef _MSC_VER - - typedef __int64 INT_PTR, *PINT_PTR; - typedef unsigned __int64 UINT_PTR, *PUINT_PTR; - - typedef __int64 LONG_PTR, *PLONG_PTR; - typedef unsigned __int64 ULONG_PTR, *PULONG_PTR; - - #else // !_MSC_VER - - typedef unsigned long ULONG_PTR; - typedef long LONG_PTR; - - #endif // _MSC_VER -#else - - typedef __w64 int INT_PTR, *PINT_PTR; - typedef __w64 unsigned int UINT_PTR, *PUINT_PTR; - - typedef __w64 long LONG_PTR, *PLONG_PTR; - typedef __w64 unsigned long ULONG_PTR, *PULONG_PTR; - -#endif - -typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR; -typedef ULONG_PTR SIZE_T, *PSIZE_T; +// Another complication, the signed BOOL and C++ bool are unsuitable for bit fields +// of 1-bit size, due to the way C++ sign extends 1 to be 0xFFFFFFFF. This type is +// designed for usage in bit fields involving 4-byte types without thse sign extension +// problems. +typedef UINT32 FLAG32; + +// A Note about UINT32 and LONG. +// ESE code has traditionally intermingled "int" based types and "long" based types. This +// is because, in Windows, both are 32 bit unsigned integral values. And it's totally safe +// since the compiler will coerce between the two AND the two have the same range, bit layout, etc. +// HOWEVER. +// Because they are different base types, the compiler thinks that (int *) and (long *) are +// too different to coerce. That's why we have random casts between the two. The C++ spec +// says that it is permissible for a compiler to play tricks such that it actually isn't safe +// to just cast an "int *" to a "long *". And it's possible that systems such as GCC on Linux +// actually do play games that make that unsafe (even if they were the same number of bytes, +// which they aren't on Linux). But here, now, with MSVC and Windows, it's safe. Furthermore, +// we actually do it all the time. We're working towards a reorganization of our +// base types such that we don't mix and match those base types. Until that's done and clean, +// we need the following: + +// These eventually should only be in the OS directory. They're for places where it's mandatory +// to use "long" to interact with the OS in code that otherwise wishes to restrict itself to the +// "core" ESE data types defined in this file. The most likely places to use them will be in +// casts and in mirrored declarations of OS provided functions (i.e. not picked up from a header). +typedef long OS_WIN_LONG; +typedef unsigned long OS_WIN_ULONG; +typedef unsigned long OS_WIN_DWORD; +typedef long * OS_WIN_PLONG; +typedef unsigned long * OS_WIN_PULONG; +typedef unsigned long * OS_WIN_PDWORD; + +// We're going to get rid of these from this file as we reorganize to the base types above. +// This file holds the minimal type definitions that the "core" implementation of ESE may use. +// There are other "non-core" portions (like the perfmon code) that will be allowed to use +// a wider variety of types in order to interact with Windows. Those types will be defined +// elsewhere and only included where absolutely needed. +typedef INT16 SHORT; +typedef INT32 INT; +typedef INT64 LONG64; +typedef INT64 LONGLONG; +typedef UINT8 * PBYTE; +typedef UINT16 USHORT; +typedef UINT32 DWORD32; // Unlike DWORD, not really dereived from "long" type. +typedef UINT32 UINT; +typedef UINT32 ULONG32; // Not really derived from "long" type. +typedef UINT64 ULONG64; +typedef UINT64 ULONGLONG; +typedef UINT64 * PULONGLONG; +typedef INT_PTR LONG_PTR; // Not really derived from "long" type. +typedef INT_PTR SIGNED_PTR; +typedef UINT_PTR DWORD_PTR; // Not really derived from "long" type. +typedef UINT_PTR ULONG_PTR; // Not really derived from "long" type. +typedef UINT_PTR UNSIGNED_PTR; + +typedef long LONG; // Note the problematic "long" derived type. +typedef unsigned long ULONG; // Note the problematic "long" derived type. +typedef unsigned long * PULONG; // Note the problematic "long" derived type. + +typedef PSTR LPSTR; // The LP stands for "Long Pointer" which has been obsolete for decades. +typedef PCSTR LPCSTR; // The LP stands for "Long Pointer" which has been obsolete for decades. +typedef PWSTR LPWSTR; // The LP stands for "Long Pointer" which has been obsolete for decades. +typedef PCWSTR LPCWSTR; // The LP stands for "Long Pointer" which has been obsolete for decades. // Common project types // -typedef _Return_type_success_( return >= 0 ) INT ERR; - +typedef _Return_type_success_( return >= 0 ) INT32 ERR; // // Limits // -const USHORT usMin = 0x0000; -const USHORT usMax = 0xFFFF; -const LONG lMin = 0x80000000; -const LONG lMax = 0x7FFFFFFF; +constexpr UINT16 usMin = 0x0000; +constexpr UINT16 usMax = 0xFFFF; + +constexpr INT32 lMin = 0x80000000; +constexpr INT32 lMax = 0x7FFFFFFF; -const ULONG ulMin = 0x00000000; -const ULONG ulMax = 0xFFFFFFFF; +constexpr UINT32 ulMin = 0x00000000; +constexpr UINT32 ulMax = 0xFFFFFFFF; -const LONG64 llMin = 0x8000000000000000; -const LONG64 llMax = 0x7FFFFFFFFFFFFFFF; +constexpr INT64 llMin = 0x8000000000000000; +constexpr INT64 llMax = 0x7FFFFFFFFFFFFFFF; -const ULONG64 ullMin = 0x0000000000000000; -const ULONG64 ullMax = 0xFFFFFFFFFFFFFFFF; +constexpr UINT64 ullMin = 0x0000000000000000; +constexpr UINT64 ullMax = 0xFFFFFFFFFFFFFFFF; #if defined(_WIN64) -const UNSIGNED_PTR upMin = ullMin; -const UNSIGNED_PTR upMax = ullMax; +const UNSIGNED_PTR upMin = 0x0000000000000000; +const UNSIGNED_PTR upMax = 0xFFFFFFFFFFFFFFFF; #else // !_WIN64 -const UNSIGNED_PTR upMin = ulMin; -const UNSIGNED_PTR upMax = ulMax; +const UNSIGNED_PTR upMin = 0x00000000; +const UNSIGNED_PTR upMax = 0xFFFFFFFF; #endif // _WIN64 -const QWORD bMax = 0xFF; -const QWORD wMax = 0xFFFF; -const QWORD dwMax = 0xFFFFFFFF; -const QWORD qwMax = 0xFFFFFFFFFFFFFFFF; +constexpr QWORD bMax = 0xFF; +constexpr QWORD wMax = 0xFFFF; +constexpr QWORD dwMax = 0xFFFFFFFF; +constexpr QWORD qwMax = 0xFFFFFFFFFFFFFFFF; + +// Explicit numeric values were used to emphasize differences visually, but lets make sure the values used +// match the expected symbolic values from standard headers. +static_assert( usMin == 0 ); +static_assert( usMax == UINT16_MAX ); +static_assert( lMin == INT32_MIN ); +static_assert( lMax == INT32_MAX ); +static_assert( ulMin == 0 ); +static_assert( ulMax == UINT32_MAX ); +static_assert( llMin == INT64_MIN ); +static_assert( llMax == INT64_MAX ); +static_assert( ullMin == 0 ); +static_assert( ullMax == UINT64_MAX ); +static_assert( upMin == 0 ); +static_assert( upMax == UINTPTR_MAX ); +static_assert( bMax == UINT8_MAX ); +static_assert( wMax == UINT16_MAX ); +static_assert( dwMax == UINT32_MAX ); +static_assert( qwMax == UINT64_MAX ); + // // Declarative Defines // -#ifndef _MSC_VER - - // Only the Microsoft VC++ in some build environment has alternate calling conventions as default at play and - // thus requires cdecl to be declared where we want the classic calling convention, so on we can just - // define this to nothing on UNIX (as everything is implicitly __cdecl there). - #define __cdecl - #define __stdcall - -#endif // !_MSC_VER - - +// This will go away to be replaced with static_assert +#define C_ASSERT(e) typedef char __C_ASSERT__[(e)?1:-1] // -// Map commonly used CRT like pseudo functions +// Call type overrides // -#ifndef _MSC_VER +#ifdef _MSC_VER + + // None - #define _stricmp strcasecmp +#else // !_MSC_VER -#endif // !_MSC_VER + // Only the Microsoft VC++ in some build environment has alternate calling conventions as default at play and + // thus requires cdecl to be declared where we want the classic calling convention, so on we can just + // define this to nothing elsewhere (such as GCC on Unix, as everything is implicitly __cdecl there). + #define __cdecl + #define __stdcall +#endif // -// Basic "C operators" +// Map commonly used CRT like pseudo functions and basic "C" operators // #ifdef _MSC_VER -#define OffsetOf(s,m) (SIZE_T)&(((s *)0)->m) -#else -#define OffsetOf(s,m) __builtin_offsetof( s, m ) -#endif -#define CONTAINING_RECORD(address, type, field) ((type *)( \ - (PCHAR)(address) - \ - (ULONG_PTR)(&((type *)0)->field))) + #define OffsetOf(s,m) (SIZE_T)&(((s *)0)->m) + // No need for _stricmp, it's defined by MSVC + // No need for _countof, it's defined by MSVC + +#else // !_MSC_VER + + #define OffsetOf(s,m) __builtin_offsetof( s, m ) + #define _stricmp strcasecmp + #define _countof(rg) ( sizeof(rg) / sizeof(rg[0]) ) -#ifdef _MSC_VER -// No need - this set of operators (such as _countof()) is defined for MSVC tool set. -#else -#define _countof(rg) ( sizeof(rg) / sizeof(rg[0]) ) #endif #define _cbrg(rg) ( _countof(rg) * sizeof(rg[0]) ) - // // Compiler warning control // @@ -315,21 +321,26 @@ const QWORD qwMax = 0xFFFFFFFFFFFFFFFF; #pragma warning ( disable : 4786 ) // we allow huge symbol names #ifdef DEBUG - #else // DEBUG + // None + #else // !DEBUG #pragma warning ( disable : 4189 ) // local variable is initialized but not referenced - #endif // !DEBUG + #endif #define Unused( var ) ( var ) -#endif // _MSC_VER +#else // !_MSC_VER + + // None + +#endif #if !defined(BEGIN_PRAGMA_OPTIMIZE_DISABLE) -#define BEGIN_PRAGMA_OPTIMIZE_DISABLE(flags, bug, reason) \ - __pragma(optimize(flags, off)) -#define BEGIN_PRAGMA_OPTIMIZE_ENABLE(flags, bug, reason) \ - __pragma(optimize(flags, on)) -#define END_PRAGMA_OPTIMIZE() \ - __pragma(optimize("", on)) + #define BEGIN_PRAGMA_OPTIMIZE_DISABLE(flags, bug, reason) \ + __pragma(optimize(flags, off)) + #define BEGIN_PRAGMA_OPTIMIZE_ENABLE(flags, bug, reason) \ + __pragma(optimize(flags, on)) + #define END_PRAGMA_OPTIMIZE() \ + __pragma(optimize("", on)) #endif diff --git a/dev/ese/published/inc/os/string.hxx b/dev/ese/published/inc/os/string.hxx index e6d2a6b7..24079f6d 100644 --- a/dev/ese/published/inc/os/string.hxx +++ b/dev/ese/published/inc/os/string.hxx @@ -8,18 +8,6 @@ // string with the singular exception of being passed a buffer that has zero length. #include -#ifndef PSTR -typedef _Null_terminated_ char * PSTR; /* ASCII string (char *) null terminated */ -#ifndef PCSTR -#endif -typedef _Null_terminated_ const char * PCSTR; /* const ASCII string (char *) null terminated */ -#ifndef PWSTR -#endif -typedef _Null_terminated_ wchar_t * PWSTR; /* Unicode string (char *) null terminated */ -#ifndef PCWSTR -#endif -typedef _Null_terminated_ const wchar_t * PCWSTR; /* const Unicode string (char *) null terminated */ -#endif #undef STRSAFE_NO_DEPRECATE diff --git a/dev/ese/published/inc/os/types.hxx b/dev/ese/published/inc/os/types.hxx index 3193ced6..8e4b4683 100644 --- a/dev/ese/published/inc/os/types.hxx +++ b/dev/ese/published/inc/os/types.hxx @@ -130,8 +130,6 @@ typedef DWORD LCID; typedef GUID SORTID; #define SORTIDNil { 0, 0, 0, { 0, 0, 0, 0, 0, 0, 0, 0 } } -typedef wchar_t WCHAR; - typedef struct { INT month; @@ -421,18 +419,6 @@ inline unsigned __int32 ReverseBytes< unsigned __int32 >( const unsigned __int32 return ReverseFourBytes( (const unsigned __int32) dw ); } -template<> -inline LONG ReverseBytes< LONG >( const LONG dw ) -{ - return ReverseFourBytes( (const unsigned __int32) dw ); -} - -template<> -inline ULONG ReverseBytes< ULONG >( const ULONG dw ) -{ - return ReverseFourBytes( (const unsigned __int32) dw ); -} - template<> inline __int64 ReverseBytes< __int64 >( const __int64 qw ) { From 3432f699e22696c6eb2745c94303f1542519b761 Mon Sep 17 00:00:00 2001 From: Alexandre Costa Date: Fri, 23 Sep 2022 17:20:39 +0000 Subject: [PATCH 032/102] Hybrid Shrink Phase 0.1: remove fully-flighted configuration options. This change removed the Shrink options below. Their default values are now TRUE everywhere: - JET_bitShrinkDatabaseDontMoveRootsOnAttach - JET_bitShrinkDatabaseDontTruncateLeakedPagesOnAttach - JET_bitShrinkDatabaseDontTruncateIndeterminatePagesOnAttach [Substrate:3b6466b2c6ca92426aad756045a0e3a65625111c] --- dev/ese/published/inc/jethdr.w | 10 ---------- dev/ese/src/ese/bf.cxx | 4 ++++ dev/ese/src/ese/db.cxx | 5 +---- dev/ese/src/ese/dbshrink.cxx | 14 +++---------- dev/ese/src/ese/fmp_test.cxx | 30 ---------------------------- dev/ese/src/ese/revertsnapshot.cxx | 2 +- dev/ese/src/inc/fmp.hxx | 18 ----------------- dev/ese/src/inc/revertsnapshot.h | 2 +- dev/ese/src/noncore/interop/grbits.h | 3 --- 9 files changed, 10 insertions(+), 78 deletions(-) diff --git a/dev/ese/published/inc/jethdr.w b/dev/ese/published/inc/jethdr.w index 0506852e..2d8878d2 100644 --- a/dev/ese/published/inc/jethdr.w +++ b/dev/ese/published/inc/jethdr.w @@ -7378,16 +7378,6 @@ JetIdle( // shrinking the database, but avoids a potential small extra // cost afterwards, when operating on the shrunk database. -#define JET_bitShrinkDatabaseDontMoveRootsOnAttach 0x00000004 // Disable root moves when shrinking the database - // at attachment time. NOTE: temporary, for flighting only. - -#define JET_bitShrinkDatabaseDontTruncateLeakedPagesOnAttach 0x00000008 // Disable truncating leaked pages when shrinking the database - // at attachment time. NOTE: temporary, for flighting only. - -#define JET_bitShrinkDatabaseDontTruncateIndeterminatePagesOnAttach 0x00000010 // Disable truncating indeterminate/uncategorized pages when - // shrinking the database at attachment time. - // NOTE: temporary, for flighting only. - #endif // JET_VERSION >= 0x0A01 // begin_PubEsent diff --git a/dev/ese/src/ese/bf.cxx b/dev/ese/src/ese/bf.cxx index 75d55799..491797e1 100644 --- a/dev/ese/src/ese/bf.cxx +++ b/dev/ese/src/ese/bf.cxx @@ -4648,6 +4648,10 @@ ERR ErrBFFlush( IFMP ifmp, const OBJID objidFDP, const PGNO pgnoFirst, const PGN } while ( fRetryFlush ); + if ( g_rgfmp[ ifmp ].FRBSOn() && ( g_rgfmp[ ifmp ].PRBS()->ErrFlushAll() >= JET_errSuccess ) ) + { + g_rgfmp[ ifmp ].PRBS()->AssertAllFlushed(); + } const ERR errBfFlushLoop = err; OSTraceFMP( diff --git a/dev/ese/src/ese/db.cxx b/dev/ese/src/ese/db.cxx index 4fbf1342..b56a0680 100644 --- a/dev/ese/src/ese/db.cxx +++ b/dev/ese/src/ese/db.cxx @@ -1576,10 +1576,7 @@ ERR ErrDBParseDbParams( if ( ( pgrbitShrinkDatabaseOptions != NULL ) && ( ( *pgrbitShrinkDatabaseOptions & ~( JET_bitShrinkDatabaseEofOnAttach | - JET_bitShrinkDatabaseFullCategorizationOnAttach | - JET_bitShrinkDatabaseDontMoveRootsOnAttach | - JET_bitShrinkDatabaseDontTruncateLeakedPagesOnAttach | - JET_bitShrinkDatabaseDontTruncateIndeterminatePagesOnAttach ) ) != 0 ) ) + JET_bitShrinkDatabaseFullCategorizationOnAttach ) ) != 0 ) ) { return ErrERRCheck( JET_errInvalidGrbit ); } diff --git a/dev/ese/src/ese/dbshrink.cxx b/dev/ese/src/ese/dbshrink.cxx index 13589b42..7923958a 100644 --- a/dev/ese/src/ese/dbshrink.cxx +++ b/dev/ese/src/ese/dbshrink.cxx @@ -343,11 +343,10 @@ LOCAL ERR ErrSHKIMoveLastExtent( goto HandleError; } - // Check if indeterminate-page handling is enabled and supported. + // Check if indeterminate-page handling is supported. // Note that FMP::FEfvSupported() below checks for both DB and log versions, but JET_efvShelvedPages2 // only upgrades the DB version so technically, we wouldn't need to check for the log version. - if ( pfmp->FShrinkDatabaseDontTruncateIndeterminatePagesOnAttach() || - !pfmp->FEfvSupported( JET_efvShelvedPages2 ) ) + if ( !pfmp->FEfvSupported( JET_efvShelvedPages2 ) ) { *psdr = sdrPageNotMovable; goto HandleError; @@ -364,13 +363,6 @@ LOCAL ERR ErrSHKIMoveLastExtent( *psdr = sdrUnexpected; goto HandleError; } - - // Check if leaked-page handling is enabled. - if ( pfmp->FShrinkDatabaseDontTruncateLeakedPagesOnAttach() ) - { - *psdr = sdrPageNotMovable; - goto HandleError; - } } CPAGE cpage; @@ -694,7 +686,7 @@ LOCAL ERR ErrSHKIMoveLastExtent( { Assert( !FSPSpaceCatSmallSpace( spcatfCurrent ) ); - if ( !pfmp->FShrinkDatabaseDontMoveRootsOnAttach() && pfmp->FEfvSupported( JET_efvRootPageMove ) ) + if ( pfmp->FEfvSupported( JET_efvRootPageMove ) ) { // Note that we currently only support moving all roots of a tree (root itself, OE and AE root) // at the same time. So depending on what kind of root we are processing, we need to pass the diff --git a/dev/ese/src/ese/fmp_test.cxx b/dev/ese/src/ese/fmp_test.cxx index 4f130c48..b8d9df8d 100644 --- a/dev/ese/src/ese/fmp_test.cxx +++ b/dev/ese/src/ese/fmp_test.cxx @@ -356,45 +356,15 @@ JETUNITTEST( FMP, NewAndWriteLatch ) // Verify shrink options. CHECK( !pfmp->FShrinkDatabaseEofOnAttach() ); CHECK( !pfmp->FRunShrinkDatabaseFullCatOnAttach() ); - CHECK( !pfmp->FShrinkDatabaseDontMoveRootsOnAttach() ); - CHECK( !pfmp->FShrinkDatabaseDontTruncateLeakedPagesOnAttach() ); - CHECK( !pfmp->FShrinkDatabaseDontTruncateIndeterminatePagesOnAttach() ); pfmp->SetShrinkDatabaseOptions( JET_bitShrinkDatabaseEofOnAttach ); CHECK( pfmp->FShrinkDatabaseEofOnAttach() ); CHECK( !pfmp->FRunShrinkDatabaseFullCatOnAttach() ); - CHECK( !pfmp->FShrinkDatabaseDontMoveRootsOnAttach() ); - CHECK( !pfmp->FShrinkDatabaseDontTruncateLeakedPagesOnAttach() ); - CHECK( !pfmp->FShrinkDatabaseDontTruncateIndeterminatePagesOnAttach() ); pfmp->SetShrinkDatabaseOptions( JET_bitShrinkDatabaseFullCategorizationOnAttach ); CHECK( !pfmp->FShrinkDatabaseEofOnAttach() ); CHECK( pfmp->FRunShrinkDatabaseFullCatOnAttach() ); - CHECK( !pfmp->FShrinkDatabaseDontMoveRootsOnAttach() ); - CHECK( !pfmp->FShrinkDatabaseDontTruncateLeakedPagesOnAttach() ); - CHECK( !pfmp->FShrinkDatabaseDontTruncateIndeterminatePagesOnAttach() ); - pfmp->SetShrinkDatabaseOptions( JET_bitShrinkDatabaseDontMoveRootsOnAttach ); - CHECK( !pfmp->FShrinkDatabaseEofOnAttach() ); - CHECK( !pfmp->FRunShrinkDatabaseFullCatOnAttach() ); - CHECK( pfmp->FShrinkDatabaseDontMoveRootsOnAttach() ); - CHECK( !pfmp->FShrinkDatabaseDontTruncateLeakedPagesOnAttach() ); - CHECK( !pfmp->FShrinkDatabaseDontTruncateIndeterminatePagesOnAttach() ); - pfmp->SetShrinkDatabaseOptions( JET_bitShrinkDatabaseDontTruncateLeakedPagesOnAttach ); - CHECK( !pfmp->FShrinkDatabaseEofOnAttach() ); - CHECK( !pfmp->FRunShrinkDatabaseFullCatOnAttach() ); - CHECK( !pfmp->FShrinkDatabaseDontMoveRootsOnAttach() ); - CHECK( pfmp->FShrinkDatabaseDontTruncateLeakedPagesOnAttach() ); - CHECK( !pfmp->FShrinkDatabaseDontTruncateIndeterminatePagesOnAttach() ); - pfmp->SetShrinkDatabaseOptions( JET_bitShrinkDatabaseDontTruncateIndeterminatePagesOnAttach ); - CHECK( !pfmp->FShrinkDatabaseEofOnAttach() ); - CHECK( !pfmp->FRunShrinkDatabaseFullCatOnAttach() ); - CHECK( !pfmp->FShrinkDatabaseDontMoveRootsOnAttach() ); - CHECK( !pfmp->FShrinkDatabaseDontTruncateLeakedPagesOnAttach() ); - CHECK( pfmp->FShrinkDatabaseDontTruncateIndeterminatePagesOnAttach() ); pfmp->SetShrinkDatabaseOptions( NO_GRBIT ); CHECK( !pfmp->FShrinkDatabaseEofOnAttach() ); CHECK( !pfmp->FRunShrinkDatabaseFullCatOnAttach() ); - CHECK( !pfmp->FShrinkDatabaseDontMoveRootsOnAttach() ); - CHECK( !pfmp->FShrinkDatabaseDontTruncateLeakedPagesOnAttach() ); - CHECK( !pfmp->FShrinkDatabaseDontTruncateIndeterminatePagesOnAttach() ); // Verify shrink time quota. CHECK( -1 == pfmp->DtickShrinkDatabaseTimeQuota() ); diff --git a/dev/ese/src/ese/revertsnapshot.cxx b/dev/ese/src/ese/revertsnapshot.cxx index 2ee4258d..7d5ca70b 100644 --- a/dev/ese/src/ese/revertsnapshot.cxx +++ b/dev/ese/src/ese/revertsnapshot.cxx @@ -3564,7 +3564,7 @@ RBSCleaner::RBSCleaner( IRBSCleanerConfig* const prbscleanerconfig ) : CZeroInit( sizeof( RBSCleaner ) ), m_pinst( pinst ), - m_msigRBSCleanerStop( CSyncBasicInfo( _T("RBSCleaner::m_msigRBSCleanerStop" ) ) ), + m_msigRBSCleanerStop( CSyncBasicInfo( _T( "RBSCleaner::m_msigRBSCleanerStop" ) ) ), m_critRBSFirstValidGen( CLockBasicInfo( CSyncBasicInfo( szRBSFirstValidGen ), rankRBSFirstValidGen, 0 ) ), m_prbscleaneriooperator( prbscleaneriooperator ), m_prbscleanerstate( prbscleanerstate ), diff --git a/dev/ese/src/inc/fmp.hxx b/dev/ese/src/inc/fmp.hxx index ad61f9a1..c7337f99 100644 --- a/dev/ese/src/inc/fmp.hxx +++ b/dev/ese/src/inc/fmp.hxx @@ -768,9 +768,6 @@ public: VOID SetShrinkDatabaseSizeLimit( const CPG cpgShrinkDatabaseSizeLimit ); BOOL FShrinkDatabaseEofOnAttach() const; BOOL FRunShrinkDatabaseFullCatOnAttach() const; - BOOL FShrinkDatabaseDontMoveRootsOnAttach() const; - BOOL FShrinkDatabaseDontTruncateLeakedPagesOnAttach() const; - BOOL FShrinkDatabaseDontTruncateIndeterminatePagesOnAttach() const; LONG DtickShrinkDatabaseTimeQuota() const; CPG CpgShrinkDatabaseSizeLimit() const; // Leak reclaimer. @@ -1666,21 +1663,6 @@ INLINE BOOL FMP::FRunShrinkDatabaseFullCatOnAttach() const return ( m_grbitShrinkDatabaseOptions & JET_bitShrinkDatabaseFullCategorizationOnAttach ) != 0; } -INLINE BOOL FMP::FShrinkDatabaseDontMoveRootsOnAttach() const -{ - return ( m_grbitShrinkDatabaseOptions & JET_bitShrinkDatabaseDontMoveRootsOnAttach ) != 0; -} - -INLINE BOOL FMP::FShrinkDatabaseDontTruncateLeakedPagesOnAttach() const -{ - return ( m_grbitShrinkDatabaseOptions & JET_bitShrinkDatabaseDontTruncateLeakedPagesOnAttach ) != 0; -} - -INLINE BOOL FMP::FShrinkDatabaseDontTruncateIndeterminatePagesOnAttach() const -{ - return ( m_grbitShrinkDatabaseOptions & JET_bitShrinkDatabaseDontTruncateIndeterminatePagesOnAttach ) != 0; -} - INLINE LONG FMP::DtickShrinkDatabaseTimeQuota() const { return m_dtickShrinkDatabaseTimeQuota; diff --git a/dev/ese/src/inc/revertsnapshot.h b/dev/ese/src/inc/revertsnapshot.h index 56ebcd67..e5cf9960 100644 --- a/dev/ese/src/inc/revertsnapshot.h +++ b/dev/ese/src/inc/revertsnapshot.h @@ -743,7 +743,7 @@ class CRevertSnapshot VOID AssertAllFlushed() { Assert( m_cNextFlushSegment == m_cNextWriteSegment && - ( m_pActiveBuffer == NULL || m_pActiveBuffer->m_ibNextRecord <= sizeof(RBSSEGHDR) ) ); + ( m_pActiveBuffer == NULL || m_pActiveBuffer->m_ibNextRecord == 0 || m_pActiveBuffer->m_ibNextRecord == sizeof(RBSSEGHDR) ) ); } ERR ErrSetReadBuffer( ULONG iStartSegment ); diff --git a/dev/ese/src/noncore/interop/grbits.h b/dev/ese/src/noncore/interop/grbits.h index 9863d5ff..3314d270 100644 --- a/dev/ese/src/noncore/interop/grbits.h +++ b/dev/ese/src/noncore/interop/grbits.h @@ -421,9 +421,6 @@ MSINTERNAL enum class MJET_GRBIT StageProdBetaMode = 0x400000, ShrinkDatabaseEofOnAttach = 0x00000001, // Resizes the database file during its attachment. ShrinkDatabaseFullCategorizationOnAttach = 0x00000002, // Enables full space categorization when shrinking the database - ShrinkDatabaseDontMoveRootsOnAttach = 0x00000004, // Disable root moves when shrinking the database - ShrinkDatabaseDontTruncateLeakedPagesOnAttach = 0x00000008, // Disable truncating leaked pages when shrinking the database - ShrinkDatabaseDontTruncateIndeterminatePagesOnAttach = 0x00000010, // Disable truncating indeterminate/uncategorized pages when DumpMinimum = 0x00000001, DumpMaximum = 0x00000002, DumpCacheMinimum = 0x00000004, From c8bbdc782a39410e316506a1781261088e94be08 Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Sat, 24 Sep 2022 01:22:58 +0000 Subject: [PATCH 033/102] ESE Block Cache: copy status This change exposes the caching state of an EDB file to copy status. This is implemented by tagging the EDB file with a particular alternate data stream indicating the caching status. This can then be easily checked for existence even if the EDB file is in use by another process. This state is queried via a helper method in BlockCache.Interop which is being (directly) used by Repl for the first time. [Substrate:43eb6c8a2e01ac5e6b7534ba5bdf1b5264829985] --- dev/ese/published/inc/os/osblockcache.hxx | 2 + .../interop/BlockCacheFactoryBase.h | 18 ++ .../interop/CBlockCacheFactoryWrapper.h | 5 + .../noncore/blockcache/interop/Factory.cpp | 5 + .../blockcache/interop/IBlockCacheFactory.h | 2 + dev/ese/src/os/blockcache/_factory.hxx | 39 +++ dev/ese/src/os/blockcache/_factorywrapper.hxx | 5 + dev/ese/src/os/blockcache/_filefilter.hxx | 21 ++ dev/ese/src/os/blockcache/_fsfilter.hxx | 270 ++++++++++-------- 9 files changed, 242 insertions(+), 125 deletions(-) diff --git a/dev/ese/published/inc/os/osblockcache.hxx b/dev/ese/published/inc/os/osblockcache.hxx index d70ddaf0..e5acdc79 100644 --- a/dev/ese/published/inc/os/osblockcache.hxx +++ b/dev/ese/published/inc/os/osblockcache.hxx @@ -1447,6 +1447,8 @@ class IBlockCacheFactory // bcf virtual ERR ErrDetachFile( _In_z_ const WCHAR* const wszFilePath, _In_opt_ const IBlockCacheFactory::PfnDetachFileStatus pfnDetachFileStatus, _In_opt_ const DWORD_PTR keyDetachFileStatus ) = 0; + + virtual ERR ErrIsCachedFile( _In_z_ const WCHAR* const wszFilePath, _Out_ BOOL* const pfCached ) = 0; }; class COSBlockCacheFactory diff --git a/dev/ese/src/noncore/blockcache/interop/BlockCacheFactoryBase.h b/dev/ese/src/noncore/blockcache/interop/BlockCacheFactoryBase.h index db24042b..2f7da335 100644 --- a/dev/ese/src/noncore/blockcache/interop/BlockCacheFactoryBase.h +++ b/dev/ese/src/noncore/blockcache/interop/BlockCacheFactoryBase.h @@ -581,6 +581,24 @@ namespace Internal throw EseException( err ); } } + + virtual bool IsCachedFile( String^ path ) + { + ERR err = JET_errSuccess; + BOOL fCached = fFalse; + + pin_ptr wszPath = PtrToStringChars( path ); + Call( Pi->ErrIsCachedFile( wszPath, &fCached ) ); + + return fCached ? true : false; + + HandleError: + if ( err < JET_errSuccess ) + { + throw EseException( err ); + } + return false; + } }; } } diff --git a/dev/ese/src/noncore/blockcache/interop/CBlockCacheFactoryWrapper.h b/dev/ese/src/noncore/blockcache/interop/CBlockCacheFactoryWrapper.h index 0ac5f224..a9e8c2fd 100644 --- a/dev/ese/src/noncore/blockcache/interop/CBlockCacheFactoryWrapper.h +++ b/dev/ese/src/noncore/blockcache/interop/CBlockCacheFactoryWrapper.h @@ -230,6 +230,11 @@ namespace Internal { return ErrERRCheck( JET_wrnNyi ); } + + ERR ErrIsCachedFile( _In_z_ const WCHAR* const wszFilePath, _Out_ BOOL* const pfCached ) override + { + return ErrERRCheck( JET_wrnNyi ); + } }; } } diff --git a/dev/ese/src/noncore/blockcache/interop/Factory.cpp b/dev/ese/src/noncore/blockcache/interop/Factory.cpp index 08f9946c..2a0d131b 100644 --- a/dev/ese/src/noncore/blockcache/interop/Factory.cpp +++ b/dev/ese/src/noncore/blockcache/interop/Factory.cpp @@ -517,6 +517,11 @@ namespace Internal return factory->DetachFile( path, status ); } + static bool IsCachedFile( String^ path ) + { + return factory->IsCachedFile( path ); + } + private: ref class OSLayer : IDisposable diff --git a/dev/ese/src/noncore/blockcache/interop/IBlockCacheFactory.h b/dev/ese/src/noncore/blockcache/interop/IBlockCacheFactory.h index 15955210..b9803de0 100644 --- a/dev/ese/src/noncore/blockcache/interop/IBlockCacheFactory.h +++ b/dev/ese/src/noncore/blockcache/interop/IBlockCacheFactory.h @@ -152,6 +152,8 @@ namespace Internal delegate void DetachFileStatus( int i, int c ); void DetachFile( String^ path, DetachFileStatus^ status ); + + bool IsCachedFile( String^ path ); }; } } diff --git a/dev/ese/src/os/blockcache/_factory.hxx b/dev/ese/src/os/blockcache/_factory.hxx index 9bcc3aee..07116798 100644 --- a/dev/ese/src/os/blockcache/_factory.hxx +++ b/dev/ese/src/os/blockcache/_factory.hxx @@ -154,6 +154,8 @@ class COSBlockCacheFactoryImpl : public IBlockCacheFactory ERR ErrDetachFile( _In_z_ const WCHAR* const wszFilePath, _In_opt_ const IBlockCacheFactory::PfnDetachFileStatus pfnDetachFileStatus, _In_opt_ const DWORD_PTR keyDetachFileStatus ) override; + + ERR ErrIsCachedFile( _In_z_ const WCHAR* const wszFilePath, _Out_ BOOL* const pfCached ) override; }; INLINE ERR COSBlockCacheFactoryImpl::ErrCreateFileSystemWrapper( _Inout_ IFileSystemAPI** const ppfsapiInner, @@ -869,3 +871,40 @@ HandleError: delete pfsapi; return err; } + +INLINE ERR COSBlockCacheFactoryImpl::ErrIsCachedFile( _In_z_ const WCHAR* const wszFilePath, + _Out_ BOOL* const pfCached ) +{ + ERR err = JET_errSuccess; + IFileSystemAPI* pfsapi = NULL; + WCHAR wszStreamCachedPath[ IFileSystemAPI::cchPathMax ] = { 0 }; + + *pfCached = fFalse; + + // the file is cached if the alternate data stream exists + + Call( ErrOSFSCreate( NULL, &pfsapi ) ); + + Call( ErrOSStrCbCopyW( wszStreamCachedPath, _cbrg( wszStreamCachedPath ), wszFilePath ) ); + Call( ErrOSStrCbAppendW( wszStreamCachedPath, _cbrg( wszStreamCachedPath ), CFileSystemFilter::c_wszStreamCached ) ); + Call( pfsapi->ErrPathExists( wszStreamCachedPath, NULL ) ); + + *pfCached = fTrue; + +HandleError: + delete pfsapi; + if ( err < JET_errSuccess ) + { + switch ( err ) + { + case JET_errInvalidPath: + case JET_errBufferTooSmall: + case JET_errFileNotFound: + err = JET_errSuccess; + break; + } + + *pfCached = fFalse; + } + return err; +} diff --git a/dev/ese/src/os/blockcache/_factorywrapper.hxx b/dev/ese/src/os/blockcache/_factorywrapper.hxx index c6887aa4..1068f78a 100644 --- a/dev/ese/src/os/blockcache/_factorywrapper.hxx +++ b/dev/ese/src/os/blockcache/_factorywrapper.hxx @@ -254,6 +254,11 @@ class TBlockCacheFactoryWrapper return m_piInner->ErrDetachFile( wszFilePath, pfnDetachFileStatus, keyDetachFileStatus ); } + ERR ErrIsCachedFile( _In_z_ const WCHAR* const wszFilePath, _Out_ BOOL* const pfCached ) override + { + return m_piInner->ErrIsCachedFile( wszFilePath, pfCached ); + } + private: I* const m_piInner; diff --git a/dev/ese/src/os/blockcache/_filefilter.hxx b/dev/ese/src/os/blockcache/_filefilter.hxx index 5c6e8a3c..4b91b080 100644 --- a/dev/ese/src/os/blockcache/_filefilter.hxx +++ b/dev/ese/src/os/blockcache/_filefilter.hxx @@ -403,6 +403,7 @@ class TFileFilter // ff return pff->ErrAttach( offsetsFirstWrite ); } ERR ErrAttach( _In_ const COffsets& offsetsFirstWrite ); + ERR ErrMarkAsNotCached(); ERR ErrGetConfiguredCache(); ERR ErrCacheOpenFailure( _In_ const char* const szFunction, _In_ const ERR errFromCall, @@ -3291,9 +3292,29 @@ HandleError: } delete pcfh; OSMemoryPageFree( pvData ); + if ( !m_pcfh ) + { + CallS( ErrMarkAsNotCached() ); + } return fPresumeAttached ? err : JET_errSuccess; } +template< class I > +ERR TFileFilter::ErrMarkAsNotCached() +{ + ERR err = JET_errSuccess; + WCHAR wszCachedFile[ IFileSystemAPI::cchPathMax ] = { 0 }; + WCHAR wszStreamCachedPath[ IFileSystemAPI::cchPathMax ] = { 0 }; + + Call( TFileFilter::ErrPath( wszCachedFile ) ); + Call( ErrOSStrCbCopyW( wszStreamCachedPath, _cbrg( wszStreamCachedPath ), wszCachedFile ) ); + Call( ErrOSStrCbAppendW( wszStreamCachedPath, _cbrg( wszStreamCachedPath ), CFileSystemFilter::c_wszStreamCached ) ); + Call( m_pfsf->ErrFileDelete( wszStreamCachedPath ) ); + +HandleError: + return err; +} + template< class I > ERR TFileFilter::ErrGetConfiguredCache() { diff --git a/dev/ese/src/os/blockcache/_fsfilter.hxx b/dev/ese/src/os/blockcache/_fsfilter.hxx index 9b468200..2d2e997e 100644 --- a/dev/ese/src/os/blockcache/_fsfilter.hxx +++ b/dev/ese/src/os/blockcache/_fsfilter.hxx @@ -461,6 +461,11 @@ class TFileSystemFilter // fsf ERR ErrWrapFile( _Inout_ IFileAPI** const ppfapiInner, _Out_ IFileAPI** const ppfapi ) override; + public: + + static const WCHAR c_wszStreamEverEligible[]; + static const WCHAR c_wszStreamCached[]; + private: void ReleaseFile( _In_opt_ CFilePathTableEntry* const pfpte, @@ -493,9 +498,13 @@ class TFileSystemFilter // fsf _In_ const IFileAPI::FileModeFlags fmf, _In_ CFilePathTableEntry* const pfpte, _Out_ CFileFilterReference** const ppffr ); + ERR ErrGetCachingConfiguration( _In_z_ const WCHAR* const wszKeyPath, + _In_ const IFileAPI::FileModeFlags fmf, + _Out_ ICachedFileConfiguration** const ppcfconfig, + _Out_ BOOL* const pfCachingEnabled ); ERR ErrTryMarkAsEverEligibleForCaching( _In_z_ const WCHAR* const wszAnyAbsPath, _In_ const BOOL fOverwriteExisting, - _In_ const BOOL fOpenExisting, + _In_ const BOOL fCachingEnabled, _Out_ BOOL* const pfMarked ); ERR ErrFileConfigure( _In_ CFileFilter* const pff, @@ -533,9 +542,7 @@ class TFileSystemFilter // fsf _In_opt_ const TFileSystemFilter::PfnDetachFileStatus pfnDetachFileStatus, _In_opt_ const DWORD_PTR keyDetachFileStatus, _Out_ CFileFilter** const ppff ); - ERR ErrEverEligibleForCaching( _In_z_ const WCHAR* const wszAnyAbsPath, - _In_ const IFileAPI::FileModeFlags fmf, - _Out_ BOOL* const pfEverEligible ); + ERR ErrMarkAsNotCached( _In_ CFileFilter* const pff ); ERR ErrFileOpenCacheHit( _In_ const IFileAPI::FileModeFlags fmf, _In_ const BOOL fCreate, _In_ const BOOL fCacheOpen, @@ -607,8 +614,6 @@ class TFileSystemFilter // fsf private: - static const WCHAR c_wszFileEverEligibleForCaching[]; - IFileSystemConfiguration* const m_pfsconfig; IFileIdentification* const m_pfident; ICacheTelemetry* const m_pctm; @@ -1080,6 +1085,12 @@ HandleError: return err; } +template< class I > +const WCHAR TFileSystemFilter::c_wszStreamEverEligible[] = L":788638d4-9b8c-4518-99a6-2512769b1676"; + +template< class I > +const WCHAR TFileSystemFilter::c_wszStreamCached[] = L":d9bf91f9-c773-403a-af3a-a2ade89607a4"; + template< class I > void TFileSystemFilter::ReleaseFile( _In_opt_ CFilePathTableEntry* const pfpte, _In_opt_ CSemaphore* const psem, @@ -1336,7 +1347,6 @@ ERR TFileSystemFilter::ErrFileCreateCacheMiss( _In_z_ const WCHAR* const _Out_ CFileFilterReference** const ppffr ) { ERR err = JET_errSuccess; - IBlockCacheConfiguration* pbcconfig = NULL; ICachedFileConfiguration* pcfconfig = NULL; BOOL fCachingEnabled = fFalse; BOOL fEverEligibleForCaching = fFalse; @@ -1348,32 +1358,17 @@ ERR TFileSystemFilter::ErrFileCreateCacheMiss( _In_z_ const WCHAR* const // get the caching configuration for this file - Call( ErrGetConfiguration( &pbcconfig ) ); - Call( pbcconfig->ErrGetCachedFileConfiguration( pfpte->WszKeyPath(), &pcfconfig ) ); - - // determine if caching is enabled for this file - - fCachingEnabled = pcfconfig->FCachingEnabled(); - - // we do not support caching temp files - - if ( fmf & IFileAPI::fmfTemporary ) - { - fCachingEnabled = fFalse; - } + Call( ErrGetCachingConfiguration( pfpte->WszKeyPath(), fmf, &pcfconfig, &fCachingEnabled ) ); // if caching is enabled for this file then attempt to mark it as ever cached // // NOTE: we cannot attach a file to the cache that is not marked as ever cached - if ( fCachingEnabled ) - { - Call( ErrTryMarkAsEverEligibleForCaching( wszAnyAbsPath, - ( fmf & IFileAPI::fmfOverwriteExisting ) != 0, - fFalse, - &fEverEligibleForCaching ) ); - fCreated = fEverEligibleForCaching; - } + Call( ErrTryMarkAsEverEligibleForCaching( wszAnyAbsPath, + ( fmf & IFileAPI::fmfOverwriteExisting ) != 0, + fCachingEnabled, + &fEverEligibleForCaching ) ); + fCreated = fEverEligibleForCaching; // create the file @@ -1418,66 +1413,124 @@ HandleError: return err; } +template< class I > +ERR TFileSystemFilter::ErrGetCachingConfiguration( _In_z_ const WCHAR* const wszKeyPath, + _In_ const IFileAPI::FileModeFlags fmf, + _Out_ ICachedFileConfiguration** const ppcfconfig, + _Out_ BOOL* const pfCachingEnabled ) +{ + ERR err = JET_errSuccess; + IBlockCacheConfiguration* pbcconfig = NULL; + ICachedFileConfiguration* pcfconfig = NULL; + const DWORD cwchAbsPathCachingFileMax = IFileSystemAPI::cchPathMax; + WCHAR wszAbsPathCachingFile[ cwchAbsPathCachingFileMax ] = { 0 }; + const DWORD cwchKeyPathCachingFileMax = IFileIdentification::cwchKeyPathMax; + WCHAR wszKeyPathCachingFile[ cwchKeyPathCachingFileMax ] = { 0 }; + ICacheConfiguration* pcconfig = NULL; + BOOL fCachingEnabled = fFalse; + + *ppcfconfig = NULL; + *pfCachingEnabled = fFalse; + + // get the caching configuration for this file + + Call( ErrGetConfiguration( &pbcconfig ) ); + Call( pbcconfig->ErrGetCachedFileConfiguration( wszKeyPath, &pcfconfig ) ); + if ( pcfconfig->FCachingEnabled() ) + { + pcfconfig->CachingFilePath( wszAbsPathCachingFile ); + if ( wszAbsPathCachingFile[ 0 ] ) + { + Call( m_pfident->ErrGetFileKeyPath( wszAbsPathCachingFile, wszKeyPathCachingFile ) ); + Call( pbcconfig->ErrGetCacheConfiguration( wszKeyPathCachingFile, &pcconfig ) ); + } + } + + // determine if caching is enabled for this file + + fCachingEnabled = ( fmf & fmfReadOnlyMask ) == 0 && + ( fmf & IFileAPI::fmfTemporary ) == 0 && + pcfconfig->FCachingEnabled() && + wszAbsPathCachingFile[ 0 ] && + pcconfig->FCacheEnabled() && pcconfig->CbMaximumSize() > 0; + + // return our outputs + + *ppcfconfig = pcfconfig; + pcfconfig = NULL; + *pfCachingEnabled = fCachingEnabled; + +HandleError: + delete pcconfig; + delete pcfconfig; + if ( err < JET_errSuccess ) + { + delete *ppcfconfig; + *ppcfconfig = NULL; + *pfCachingEnabled = fFalse; + } + return err; +} + template< class I > ERR TFileSystemFilter::ErrTryMarkAsEverEligibleForCaching( _In_z_ const WCHAR* const wszAnyAbsPath, _In_ const BOOL fOverwriteExisting, - _In_ const BOOL fOpenExisting, + _In_ const BOOL fCachingEnabled, _Out_ BOOL* const pfMarked ) { - ERR err = JET_errSuccess; - IFileFindAPI* pffapi = NULL; - BOOL fFolder = fFalse; - BOOL fReadOnly = fFalse; - QWORD cb = 0; - WCHAR wszAlternateDataStreamPath[ OSFSAPI_MAX_PATH ] = { 0 }; - IFileAPI* pfapiAlternateDataStream = NULL; + ERR err = JET_errSuccess; + WCHAR wszStreamEverEligiblePath[ IFileSystemAPI::cchPathMax ] = { 0 }; + IFileAPI* pfapiEverEligible = NULL; + WCHAR wszStreamCachedPath[ IFileSystemAPI::cchPathMax ] = { 0 }; + IFileAPI* pfapiCached = NULL; *pfMarked = fFalse; - if ( fOpenExisting ) + // if the ever eligible marker is already present then note that + + Call( ErrOSStrCbCopyW( wszStreamEverEligiblePath, _cbrg( wszStreamEverEligiblePath ), wszAnyAbsPath ) ); + Call( ErrOSStrCbAppendW( wszStreamEverEligiblePath, _cbrg( wszStreamEverEligiblePath ), c_wszStreamEverEligible ) ); + err = TFileSystemWrapper::ErrPathExists( wszStreamEverEligiblePath, NULL ); + if ( err == JET_errSuccess ) { - err = TFileSystemWrapper::ErrFileFind( wszAnyAbsPath, &pffapi ); - if ( err >= JET_errSuccess ) - { - err = pffapi->ErrNext(); - } + *pfMarked = fTrue; + } - if ( err == JET_errFileNotFound || err == JET_errInvalidPath ) - { - Error( ErrERRCheck( JET_errFileNotFound ) ); - } - Call( err ); + // if the ever eligible marker isn't present and caching is enabled then try to create it - Call( pffapi->ErrIsFolder( &fFolder ) ); - if ( fFolder ) - { - Error( JET_errSuccess ); - } + if ( err == JET_errFileNotFound ) + { + err = JET_errSuccess; - Call( pffapi->ErrIsReadOnly( &fReadOnly ) ); - if ( fReadOnly ) + if ( fCachingEnabled ) { - Error( JET_errSuccess ); + Call( TFileSystemWrapper::ErrFileCreate( wszStreamEverEligiblePath, + fOverwriteExisting ? IFileAPI::fmfOverwriteExisting : IFileAPI::fmfNone, + &pfapiEverEligible ) ); + *pfMarked = fTrue; } + } + Call( err ); + + // best effort mark the file as cached. we will remove this later if the file isn't actually cached + + Call( ErrOSStrCbCopyW( wszStreamCachedPath, _cbrg( wszStreamCachedPath ), wszAnyAbsPath ) ); + Call( ErrOSStrCbAppendW( wszStreamCachedPath, _cbrg( wszStreamCachedPath ), c_wszStreamCached ) ); + err = TFileSystemWrapper::ErrPathExists( wszStreamCachedPath, NULL ); + if ( err == JET_errFileNotFound ) + { + err = JET_errSuccess; - Call( pffapi->ErrSize( &cb, IFileAPI::filesizeLogical ) ); - if ( !cb ) + if ( fCachingEnabled ) { - Error( JET_errSuccess ); + Call( TFileSystemWrapper::ErrFileCreate( wszStreamCachedPath, IFileAPI::fmfNone, &pfapiCached ) ); } } - - Call( ErrOSStrCbCopyW( wszAlternateDataStreamPath, _cbrg( wszAlternateDataStreamPath ), wszAnyAbsPath ) ); - Call( ErrOSStrCbAppendW( wszAlternateDataStreamPath, _cbrg( wszAlternateDataStreamPath ), c_wszFileEverEligibleForCaching ) ); - Call( TFileSystemWrapper::ErrFileCreate( wszAlternateDataStreamPath, - fOverwriteExisting ? IFileAPI::fmfOverwriteExisting : IFileAPI::fmfNone, - &pfapiAlternateDataStream ) ); - - *pfMarked = fTrue; + Call( err ); HandleError: - delete pfapiAlternateDataStream; - delete pffapi; + delete pfapiCached; + delete pfapiEverEligible; if ( err < JET_errSuccess ) { switch ( err ) @@ -1489,8 +1542,6 @@ HandleError: err = JET_errSuccess; break; } - - *pfMarked = fFalse; } return err; } @@ -1703,37 +1754,24 @@ ERR TFileSystemFilter::ErrFileOpenAndConfigure( _In_z_ const WCHAR* con _In_opt_ const DWORD_PTR keyDetachFileStatus, _Out_ CFileFilter** const ppff ) { - ERR err = JET_errSuccess; - IBlockCacheConfiguration* pbcconfig = NULL; - ICachedFileConfiguration* pcfconfig = NULL; - BOOL fEverEligibleForCaching = fFalse; - CFileFilter* pff = NULL; + ERR err = JET_errSuccess; + ICachedFileConfiguration* pcfconfig = NULL; + BOOL fCachingEnabled = fFalse; + BOOL fEverEligibleForCaching = fFalse; + CFileFilter* pff = NULL; *ppff = NULL; // get the caching configuration for this file - Call( ErrGetConfiguration( &pbcconfig ) ); - Call( pbcconfig->ErrGetCachedFileConfiguration( pfpte->WszKeyPath(), &pcfconfig ) ); - - // determine if this file has ever been eligible for caching - - Call( ErrEverEligibleForCaching( wszAnyAbsPath, fmf, &fEverEligibleForCaching ) ); + Call( ErrGetCachingConfiguration( pfpte->WszKeyPath(), fmf, &pcfconfig, &fCachingEnabled ) ); - // if the file has never been eligible for caching then see if it should be marked as such - - if ( !fEverEligibleForCaching ) - { - // if caching is enabled for this file and we are opening it for RW access then attempt to mark it as ever - // cached - // - // NOTE: we cannot attach a file to the cache that is not marked as ever cached + // if caching is enabled for this file and we are opening it for RW access then attempt to mark it as ever + // cached + // + // NOTE: we cannot attach a file to the cache that is not marked as ever cached - if ( pcfconfig->FCachingEnabled() && ( fmf & fmfReadOnlyMask ) == 0 ) - { - Call( ErrTryMarkAsEverEligibleForCaching( wszAnyAbsPath, fFalse, fTrue, &fEverEligibleForCaching ) ); - } - } + Call( ErrTryMarkAsEverEligibleForCaching( wszAnyAbsPath, fFalse, fCachingEnabled, &fEverEligibleForCaching ) ); // open the file with the specified flags @@ -1770,37 +1808,18 @@ HandleError: } template< class I > -ERR TFileSystemFilter::ErrEverEligibleForCaching( _In_z_ const WCHAR* const wszAnyAbsPath, - _In_ const IFileAPI::FileModeFlags fmf, - _Out_ BOOL* const pfEverEligible ) +ERR TFileSystemFilter::ErrMarkAsNotCached( _In_ CFileFilter* const pff ) { - ERR err = JET_errSuccess; - WCHAR wszAlternateDataStreamPath[ OSFSAPI_MAX_PATH ] = { 0 }; - IFileAPI* pfapiAlternateDataStream = NULL; - - *pfEverEligible = fFalse; + ERR err = JET_errSuccess; + WCHAR wszCachedFile[ IFileSystemAPI::cchPathMax ] = { 0 }; + WCHAR wszStreamCachedPath[ IFileSystemAPI::cchPathMax ] = { 0 }; - Call( ErrOSStrCbCopyW( wszAlternateDataStreamPath, _cbrg( wszAlternateDataStreamPath ), wszAnyAbsPath ) ); - Call( ErrOSStrCbAppendW( wszAlternateDataStreamPath, _cbrg( wszAlternateDataStreamPath ), c_wszFileEverEligibleForCaching ) ); - Call( TFileSystemWrapper::ErrFileOpen( wszAlternateDataStreamPath, fmf, &pfapiAlternateDataStream ) ); - - *pfEverEligible = fTrue; + Call( pff->ErrPath( wszCachedFile ) ); + Call( ErrOSStrCbCopyW( wszStreamCachedPath, _cbrg( wszStreamCachedPath ), wszCachedFile ) ); + Call( ErrOSStrCbAppendW( wszStreamCachedPath, _cbrg( wszStreamCachedPath ), c_wszStreamCached ) ); + Call( TFileSystemWrapper::ErrFileDelete( wszStreamCachedPath ) ); HandleError: - delete pfapiAlternateDataStream; - if ( err < JET_errSuccess ) - { - switch ( err ) - { - case JET_errInvalidPath: - case JET_errBufferTooSmall: - case JET_errFileNotFound: - err = JET_errSuccess; - break; - } - - *pfEverEligible = fFalse; - } return err; } @@ -1941,7 +1960,7 @@ ERR TFileSystemFilter::ErrGetCache( _In_ CFileFilter* const &pc ); if ( err < JET_errSuccess ) { - Call( ErrCacheOpenFailure( pff, "OpenById", err, ErrERRCheck(JET_errDiskIO))); + Call( ErrCacheOpenFailure( pff, "OpenById", err, ErrERRCheck( JET_errDiskIO ) ) ); } // save the caching state @@ -2088,6 +2107,10 @@ ERR TFileSystemFilter::ErrDetachFile( _In_ CFileFilter* const Call( pff->Pc()->ErrClose( pff->Pcfh()->Volumeid(), pff->Pcfh()->Fileid(), pff->Pcfh()->Fileserial() ) ); + // mark the file as not cached + + Call( ErrMarkAsNotCached( pff ) ); + HandleError: OSMemoryPageFree( pvData ); if ( pdestagingfile ) @@ -2159,9 +2182,6 @@ void TFileSystemFilter::TermFilePathTable() } } -template< class I > -const WCHAR TFileSystemFilter::c_wszFileEverEligibleForCaching[] = L":788638d4-9b8c-4518-99a6-2512769b1676"; - INLINE typename CFilePathHash::NativeCounter CFilePathHash::CKeyEntry::Hash( const CFilePathHashKey& key ) { return CFilePathHash::NativeCounter( key.UiHash() ); From 88479c300f629dc408363728ef8551834da8171a Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Sat, 24 Sep 2022 06:10:42 +0000 Subject: [PATCH 034/102] ESE Block Cache: Perf: copy ctor for CCachedBlockSlot Use memcpy for improved performance. This impacts the CUpdateSlabVisitor which is used in the update path. [Substrate:5ffe7cc05887a6f027e601722d88cc0a29f370c7] --- dev/ese/published/inc/os/osblockcache.hxx | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dev/ese/published/inc/os/osblockcache.hxx b/dev/ese/published/inc/os/osblockcache.hxx index e5acdc79..c369ca10 100644 --- a/dev/ese/published/inc/os/osblockcache.hxx +++ b/dev/ese/published/inc/os/osblockcache.hxx @@ -944,6 +944,11 @@ class CCachedBlockSlot : public CCachedBlock { } + CCachedBlockSlot( _In_ const CCachedBlockSlot& other ) + { + memcpy( this, &other, sizeof( *this ) ); + } + QWORD IbSlab() const { return m_le_ibSlab; } ChunkNumber Chno() const { return m_le_chno; } SlotNumber Slno() const { return m_le_slno; } From fa005c17a61d40e5611063fbf04f395599f5874a Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Tue, 27 Sep 2022 03:37:07 +0000 Subject: [PATCH 035/102] ESE Block Cache: Copy Status fixes EBC introduced a regression where an attempt to access a file that was configured for caching but whose caching file storage was missing would fail with JET_errInvalidPath. This change addresses this regression and adds test coverage for this scenario plus other scenarios where caching is partially configured. We also did some additional code cleanup/clarification and hardening against corner case errors. [Substrate:1afa5ca69eea65233a8e2a07a7e982a641e1168f] --- dev/ese/src/os/blockcache/_filefilter.hxx | 10 ++++ dev/ese/src/os/blockcache/_fsfilter.hxx | 60 ++++++++++++++--------- 2 files changed, 46 insertions(+), 24 deletions(-) diff --git a/dev/ese/src/os/blockcache/_filefilter.hxx b/dev/ese/src/os/blockcache/_filefilter.hxx index 4b91b080..192aafd1 100644 --- a/dev/ese/src/os/blockcache/_filefilter.hxx +++ b/dev/ese/src/os/blockcache/_filefilter.hxx @@ -3312,6 +3312,16 @@ ERR TFileFilter::ErrMarkAsNotCached() Call( m_pfsf->ErrFileDelete( wszStreamCachedPath ) ); HandleError: + if ( err < JET_errSuccess ) + { + switch ( err ) + { + case JET_errInvalidPath: + case JET_errBufferTooSmall: + err = JET_errSuccess; + break; + } + } return err; } diff --git a/dev/ese/src/os/blockcache/_fsfilter.hxx b/dev/ese/src/os/blockcache/_fsfilter.hxx index 2d2e997e..9907a7fe 100644 --- a/dev/ese/src/os/blockcache/_fsfilter.hxx +++ b/dev/ese/src/os/blockcache/_fsfilter.hxx @@ -1441,8 +1441,12 @@ ERR TFileSystemFilter::ErrGetCachingConfiguration( _In_z_ const WCHAR* con pcfconfig->CachingFilePath( wszAbsPathCachingFile ); if ( wszAbsPathCachingFile[ 0 ] ) { - Call( m_pfident->ErrGetFileKeyPath( wszAbsPathCachingFile, wszKeyPathCachingFile ) ); - Call( pbcconfig->ErrGetCacheConfiguration( wszKeyPathCachingFile, &pcconfig ) ); + err = m_pfident->ErrGetFileKeyPath( wszAbsPathCachingFile, wszKeyPathCachingFile ); + if ( err >= JET_errSuccess ) + { + Call( pbcconfig->ErrGetCacheConfiguration( wszKeyPathCachingFile, &pcconfig ) ); + } + Call( err == JET_errInvalidPath ? JET_errSuccess : err ); } } @@ -1452,6 +1456,7 @@ ERR TFileSystemFilter::ErrGetCachingConfiguration( _In_z_ const WCHAR* con ( fmf & IFileAPI::fmfTemporary ) == 0 && pcfconfig->FCachingEnabled() && wszAbsPathCachingFile[ 0 ] && + pcconfig && pcconfig->FCacheEnabled() && pcconfig->CbMaximumSize() > 0; // return our outputs @@ -1495,38 +1500,37 @@ ERR TFileSystemFilter::ErrTryMarkAsEverEligibleForCaching( _In_z_ const WC { *pfMarked = fTrue; } + Call( err == JET_errFileNotFound ? JET_errSuccess : err ); // if the ever eligible marker isn't present and caching is enabled then try to create it - if ( err == JET_errFileNotFound ) + if ( !( *pfMarked ) && fCachingEnabled ) { - err = JET_errSuccess; - - if ( fCachingEnabled ) - { - Call( TFileSystemWrapper::ErrFileCreate( wszStreamEverEligiblePath, - fOverwriteExisting ? IFileAPI::fmfOverwriteExisting : IFileAPI::fmfNone, - &pfapiEverEligible ) ); - *pfMarked = fTrue; - } + err = TFileSystemWrapper::ErrFileCreate( wszStreamEverEligiblePath, + fOverwriteExisting ? IFileAPI::fmfOverwriteExisting : IFileAPI::fmfNone, + &pfapiEverEligible ); + Call( err == JET_errFileAlreadyExists ? JET_errSuccess : err ); + delete pfapiEverEligible; + pfapiEverEligible = fFalse; + *pfMarked = fTrue; } - Call( err ); // best effort mark the file as cached. we will remove this later if the file isn't actually cached - Call( ErrOSStrCbCopyW( wszStreamCachedPath, _cbrg( wszStreamCachedPath ), wszAnyAbsPath ) ); - Call( ErrOSStrCbAppendW( wszStreamCachedPath, _cbrg( wszStreamCachedPath ), c_wszStreamCached ) ); - err = TFileSystemWrapper::ErrPathExists( wszStreamCachedPath, NULL ); - if ( err == JET_errFileNotFound ) + if ( *pfMarked && fCachingEnabled ) { - err = JET_errSuccess; - - if ( fCachingEnabled ) + Call( ErrOSStrCbCopyW( wszStreamCachedPath, _cbrg( wszStreamCachedPath ), wszAnyAbsPath ) ); + Call( ErrOSStrCbAppendW( wszStreamCachedPath, _cbrg( wszStreamCachedPath ), c_wszStreamCached ) ); + err = TFileSystemWrapper::ErrPathExists( wszStreamCachedPath, NULL ); + if ( err == JET_errFileNotFound ) { - Call( TFileSystemWrapper::ErrFileCreate( wszStreamCachedPath, IFileAPI::fmfNone, &pfapiCached ) ); + err = TFileSystemWrapper::ErrFileCreate( wszStreamCachedPath, IFileAPI::fmfNone, &pfapiCached ); + Call( err == JET_errFileAlreadyExists ? JET_errSuccess : err ); + delete pfapiCached; + pfapiCached = fFalse; } + Call( err ); } - Call( err ); HandleError: delete pfapiCached; @@ -1537,8 +1541,6 @@ HandleError: { case JET_errInvalidPath: case JET_errBufferTooSmall: - case JET_errFileNotFound: - case JET_errFileAlreadyExists: err = JET_errSuccess; break; } @@ -1820,6 +1822,16 @@ ERR TFileSystemFilter::ErrMarkAsNotCached( _In_ CFileFilter* const pff ) Call( TFileSystemWrapper::ErrFileDelete( wszStreamCachedPath ) ); HandleError: + if ( err < JET_errSuccess ) + { + switch ( err ) + { + case JET_errInvalidPath: + case JET_errBufferTooSmall: + err = JET_errSuccess; + break; + } + } return err; } From 2982148943db5c9efa26d4ca87c28d17e74bc019 Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Wed, 28 Sep 2022 15:15:17 +0000 Subject: [PATCH 036/102] ESE Block Cache: Perf: flush all state on a clean dismount to prevent repeated recovery on file accesses Perf data from prod shows that HA file checker is consuming significant CPU just accessing the database file header for a cached file. This is because the cache is not in a clean shutdown state and so it must recompute significant state on mount. This change causes us to attempt a best effort flush of all state (to the caching file, i.e. flash) on a clean dismount of the cache. This should be much cheaper than running recovery. [Substrate:2172d622faf9571b8e953d74524f6f2e3e1eee7a] --- .../src/os/blockcache/_hashedlrukcache.hxx | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx index 8b0cc7b6..789888fe 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx @@ -5256,12 +5256,30 @@ HandleError: template< class I > ERR THashedLRUKCache::ErrPrepareToDismount() { - ERR err = JET_errSuccess; + ERR err = JET_errSuccess; + JournalPosition jposReplay = jposInvalid; + JournalPosition jposDurableForWriteBack = jposInvalid; + JournalPosition jposDurable = jposInvalid; // flush our state for all files Call( ErrFlush() ); + // flush all our state + + Call( m_pj->ErrGetProperties( &jposReplay, &jposDurableForWriteBack, &jposDurable, NULL, NULL ) ); + if ( jposDurableForWriteBack < jposDurable ) + { + Call( ErrFlush() ); + Call( m_pj->ErrGetProperties( &jposReplay, &jposDurableForWriteBack, &jposDurable, NULL, NULL ) ); + } + if ( jposReplay < jposDurableForWriteBack ) + { + Call( ErrFlushAllState( jposDurableForWriteBack ) ); + Call( m_pj->ErrTruncate( jposDurableForWriteBack ) ); + Call( ErrFlush() ); + } + HandleError: return err; } @@ -5327,7 +5345,7 @@ ERR THashedLRUKCache::ErrDump( _In_ CPRINTF* const pcprintf ) { // analyze the journal - errAnalyzeJournal = ErrAnalyzeJournal(); + errAnalyzeJournal = ErrAnalyzeJournal( fTrue ); // dump the journal metadata From 1eed330fb8d77a271a234c37ebf01d7a37c80d0b Mon Sep 17 00:00:00 2001 From: Alexandre Costa Date: Wed, 28 Sep 2022 17:22:48 +0000 Subject: [PATCH 037/102] Do not crash on cache-bypass lost flushes in the test pass if it heals after one retry. [Substrate:4023972055c7bbbfc66e99a3d0264e72923882c6] --- dev/ese/src/ese/io.cxx | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dev/ese/src/ese/io.cxx b/dev/ese/src/ese/io.cxx index 551de695..ea8e2694 100644 --- a/dev/ese/src/ese/io.cxx +++ b/dev/ese/src/ese/io.cxx @@ -4194,7 +4194,7 @@ ERR FMP::ErrDBReadPages( // If we got an unexpected lost flush error, loop for a while to see if the page fixes itself. if ( ( err == JET_errReadLostFlushVerifyFailure ) && !FNegTest( fCorruptingWithLostFlush ) ) { - DWORD cRetriesMax = 0; + DWORD cRetriesMax = 0, cRetriesToAssert = 0; #ifdef DEBUG const CPG cpgActual = pgnoEnd - pgnoStart + 1; @@ -4205,10 +4205,14 @@ ERR FMP::ErrDBReadPages( } cRetriesMax = 100; + + // Unfortunately, there seems to be hardware which loses writes in our test pass pool of machines. + cRetriesToAssert = 2; #else // !DEBUG // FNegTest() always returns fFalse in RETAIL, so do not run the retry loop when running tests, otherwise // they will get confused with multiple events and take too long with 1 sec per lost flush detected. cRetriesMax = ( _wcsicmp( WszUtilProcessName(), L"Microsoft.Exchange.Store.Worker" ) == 0 ) ? 10 : 0; + cRetriesToAssert = 1; #endif // DEBUG if ( cRetriesMax > 0 ) @@ -4231,7 +4235,7 @@ ERR FMP::ErrDBReadPages( UtilSleep( 100 ); } - AssertTrack( fFalse, OSFormat( "UnexpectedLostFlush:%I32u:%d:%d", cRetries, err, errRetry ) ); + AssertTrack( cRetries < cRetriesToAssert, OSFormat( "UnexpectedLostFlush:%I32u:%d:%d", cRetries, err, errRetry ) ); } } From 79abae82b7b005e3edf0996b0acde21d879c9919 Mon Sep 17 00:00:00 2001 From: Alexandre Costa Date: Thu, 29 Sep 2022 00:06:41 +0000 Subject: [PATCH 038/102] Miscellaneous test fixes [Substrate:c879509489bba512114973ffb0989cd420e6e2fd] --- dev/ese/src/ese/sysinit.cxx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/ese/src/ese/sysinit.cxx b/dev/ese/src/ese/sysinit.cxx index cb09e285..d14c0424 100644 --- a/dev/ese/src/ese/sysinit.cxx +++ b/dev/ese/src/ese/sysinit.cxx @@ -561,8 +561,8 @@ ERR INST::ErrINSTTerm( TERMTYPE termtype ) else { // allow for improper usage in test (e.g.: terminating the instance - // with an outstanding transaction). - if ( !FNegTest( fInvalidUsage ) ) + // with an outstanding transaction) or instance unavailable. + if ( !FNegTest( fInvalidUsage ) && !FInstanceUnavailable() ) { FCBAssertAllClean( this ); } From cc53cd525047eb2ffdd58ab3df258c50d10f0540 Mon Sep 17 00:00:00 2001 From: Brett Shirley Date: Thu, 29 Sep 2022 01:29:51 +0000 Subject: [PATCH 039/102] Break up the RECIRetrieveTaggedColumnsRecTooBig14.1 FireWall so we can see what the is going on. [Substrate:091946580e1ec3520ca851b04a7658fae143e837] --- dev/ese/src/ese/fldext.cxx | 126 ++++++++++++++++++++++++++++++++++++- 1 file changed, 125 insertions(+), 1 deletion(-) diff --git a/dev/ese/src/ese/fldext.cxx b/dev/ese/src/ese/fldext.cxx index 56908843..15ad4a6a 100644 --- a/dev/ese/src/ese/fldext.cxx +++ b/dev/ese/src/ese/fldext.cxx @@ -569,7 +569,131 @@ ERR ErrRECIRetrieveTaggedColumn( } if ( dataRec.Cb() < REC::cbRecordMin || dataRec.Cb() > REC::CbRecordMostCHECK( g_rgfmp[ pfcb->Ifmp() ].CbPage() ) ) { - FireWall( "RECIRetrieveTaggedColumnsRecTooBig14.1" ); + const LONG cbRec = dataRec.Cb(); + const LONG cbRecMost = REC::CbRecordMostCHECK( g_rgfmp[ pfcb->Ifmp() ].CbPage() ); + + if ( cbRec < REC::cbRecordMin ) + { + if ( cbRec == 0 ) + { + FireWall( "RECIRetrieveTaggedColumnsRecTooSmall14.1.Cb0" ); + } + else if ( cbRec >= 0 && cbRec < REC::cbRecordMin ) + { + static_assert( REC::cbRecordMin == 4 ); + FireWall( "RECIRetrieveTaggedColumnsRecTooSmall14.1.Cb1to3" ); + } + else if ( cbRec > -4 ) + { + FireWall( "RECIRetrieveTaggedColumnsRecTooSmall14.1.NegativeCbByUpToDword" ); + } + else if ( cbRec > -16 ) + { + FireWall( "RECIRetrieveTaggedColumnsRecTooSmall14.1.NegativeCbByUpTo16B" ); + } + else if ( cbRec > -( cbRecMost / 2 ) ) + { + FireWall( "RECIRetrieveTaggedColumnsRecTooSmall14.1.NegativeCbByUpToHalfRecMost" ); + } + else if ( cbRec > -( cbRecMost ) ) + { + FireWall( "RECIRetrieveTaggedColumnsRecTooSmall14.1.NegativeCbByUpToRecMost" ); + } + else if ( cbRec > -( cbRecMost * 2 ) ) + { + FireWall( "RECIRetrieveTaggedColumnsRecTooSmall14.1.NegativeCbByUpToRecMostX2" ); + } + else if ( cbRec > -( 64 * 1024 ) ) + { + FireWall( "RECIRetrieveTaggedColumnsRecTooSmall14.1.NegativeCbByUpTo64K" ); + } + else if ( cbRec > -( 128 * 1024 ) ) + { + FireWall( "RECIRetrieveTaggedColumnsRecTooSmall14.1.NegativeCbByUpTo128K" ); + } + else if ( cbRec > -( 1024 * 1024 ) ) + { + FireWall( "RECIRetrieveTaggedColumnsRecTooSmall14.1.NegativeCbByUpTo1M" ); + } + else if ( cbRec > -( 1024 * 1024 * 1024 ) ) + { + FireWall( "RECIRetrieveTaggedColumnsRecTooSmall14.1.NegativeCbByUpTo1G" ); + } + else if ( cbRec >= lMin ) + { + FireWall( "RECIRetrieveTaggedColumnsRecTooSmall14.1.NegativeCbByUpTo2G" ); + } + else + { + // This shouldn't be able to happen at all, unless dataRec was like being modified while we tested + FireWall( "RECIRetrieveTaggedColumnsRecTooBig14.1.CbTooSmallHow" ); // !!!? + } + } + else if ( cbRec > cbRecMost ) + { + if ( cbRec == cbRecMost ) + { + FireWall( "RECIRetrieveTaggedColumnsRecTooBig14.1.CbEqualsMost" ); + } + else if ( cbRec < ( cbRecMost + 4 ) ) + { + FireWall( "RECIRetrieveTaggedColumnsRecTooBig14.1.CbUpToDwordOverMost" ); + } + else if ( cbRec < ( cbRecMost + 16 ) ) + { + FireWall( "RECIRetrieveTaggedColumnsRecTooBig14.1.CbUpTo16BOverMost" ); + } + else if ( cbRec < ( cbRecMost + cbRecMost / 2 ) ) + { + FireWall( "RECIRetrieveTaggedColumnsRecTooBig14.1.CbUpTo50PctOverMost" ); + } + else if ( cbRec < ( cbRecMost * 2 ) ) + { + FireWall( "RECIRetrieveTaggedColumnsRecTooBig14.1.CbUpToX2OverMost" ); + } + else if ( cbRec < ( 64 * 1024 ) ) + { + FireWall( "RECIRetrieveTaggedColumnsRecTooBig14.1.CbUpTo64KOverMost" ); + } + else if ( cbRec < ( 128 * 1024 ) ) + { + FireWall( "RECIRetrieveTaggedColumnsRecTooBig14.1.CbUpTo128KOverMost" ); + } + else if ( cbRec < ( 1024 * 1024 ) ) + { + FireWall( "RECIRetrieveTaggedColumnsRecTooBig14.1.CbUpTo1MOverMost" ); + } + else if ( cbRec < ( 1024 * 1024 * 1024 ) ) + { + FireWall( "RECIRetrieveTaggedColumnsRecTooBig14.1.CbUpTo1GOverMost" ); + } + else if ( cbRec < ( (ULONG)2 * 1024 * 1024 * 1024 - 1 ) ) + { + FireWall( "RECIRetrieveTaggedColumnsRecTooBig14.1.CbUpTo2GOverMost" ); + } + else + { + // should be impossible, as the value is a LONG + FireWall( "RECIRetrieveTaggedColumnsRecTooBig14.1.CbGtrThan2GOverMost" ); + } + + if ( cbRecMost < 4000 ) + { + FireWall( "RECIRetrieveTaggedColumnsRecMostCuriouslySmall" ); + } + // yes this cbPage is too small for possibly other Exchange clients, but the whole branch is + // processing invalid data. + else if ( g_rgfmp[ pfcb->Ifmp() ].CbPage() != ( 32 * 1024 ) || cbRecMost < ( 32 * 1000 ) ) + { + FireWall( "RECIRetrieveTaggedColumnsRecMostOrPageSizeOdd" ); + } + } + else + { + // This shouldn't be able to happen at all, unless dataRec was like being modified while we tested + FireWall( "RECIRetrieveTaggedColumnsRecTooBig14.1.VoltaileValues" ); + } + return ErrERRCheck( JET_errDatabaseCorrupted ); } From 8d23f0fc432f0d5261db32a60c3a956ccd042bf8 Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Thu, 29 Sep 2022 15:22:57 +0000 Subject: [PATCH 040/102] ESE Block Cache: add alloc pool for CThreadLocalStorage::CRequest (redo) Redo of !2199774 with fixed critsec ranks This should largely avoid heap interactions when tracking IOs for throttling/combination checks. Fixes required: - fix ranks for critsecs - add a bypass mode to TPool so that if it is torn down first by the CRT we don't crash releasing memory [Substrate:763c3bef1d73f775af6aabb63b1e22e4d2eef2b6] --- dev/ese/src/os/blockcache/_common.hxx | 17 ++++++++++--- dev/ese/src/os/blockcache/_filefilter.hxx | 29 +++++++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/dev/ese/src/os/blockcache/_common.hxx b/dev/ese/src/os/blockcache/_common.hxx index 58cb364d..8c1808eb 100644 --- a/dev/ese/src/os/blockcache/_common.hxx +++ b/dev/ese/src/os/blockcache/_common.hxx @@ -13,11 +13,11 @@ const INT rankThrottleContexts = 0; const INT rankThrottleContext = 0; const INT rankFileFilterReferences = 0; const INT rankCachedFileHash = 0; -const INT rankCacheThreadLocalStorage = 0; const INT rankClusterReferenceHash = 0; const INT rankClusterWrites = 0; const INT rankSlabWrites = 0; const INT rankSlabWriteBackHash = 0; +const INT rankCacheThreadLocalStorage = 1; const INT rankSlabHash = 1; const INT rankCachedBlockWriteCounts = 0; const INT rankCacheRepository = 0; @@ -341,7 +341,7 @@ class TPool { void* pv = NULL; - if ( s_state.m_il.PrevMost() ) + if ( s_state.FInit() && s_state.m_il.PrevMost() ) { s_state.m_crit.Enter(); @@ -393,6 +393,12 @@ class TPool *ppv = NULL; } + if ( !s_state.FInit() ) + { + Free_( pv ); + return; + } + if ( pv && cb >= sizeof( CHeader ) ) { pheader = new( pv ) CHeader( cb ); @@ -486,15 +492,19 @@ class TPool public: CState() - : m_crit( CLockBasicInfo( CSyncBasicInfo( "TPool::CState::m_crit" ), rankPool, 0 ) ) + : m_fInit( fTrue ), + m_crit( CLockBasicInfo( CSyncBasicInfo( "TPool::CState::m_crit" ), rankPool, 0 ) ) { } ~CState() { + m_fInit = fFalse; Release( m_il ); } + BOOL FInit() const { return m_fInit; } + static void Release( CInvasiveList& il ) { while ( CHeader* const pheader = il.PrevMost() ) @@ -506,6 +516,7 @@ class TPool } } + BOOL m_fInit; CCriticalSection m_crit; typename CCountedInvasiveList m_il; }; diff --git a/dev/ese/src/os/blockcache/_filefilter.hxx b/dev/ese/src/os/blockcache/_filefilter.hxx index 192aafd1..5e781dc5 100644 --- a/dev/ese/src/os/blockcache/_filefilter.hxx +++ b/dev/ese/src/os/blockcache/_filefilter.hxx @@ -819,6 +819,11 @@ class TFileFilter // ff } } + static void Cleanup() + { + CRequest::Cleanup(); + } + protected: ~CThreadLocalStorage() @@ -828,6 +833,9 @@ class TFileFilter // ff private: +#pragma push_macro( "new" ) +#undef new + class CRequest { public: @@ -846,6 +854,19 @@ class TFileFilter // ff m_ilRequestsByIO.InsertAsPrevMost( this ); } + using CPool = TPool; + + void* operator new( _In_ const size_t cb ) + { + return CPool::PvAllocate(); + } + + void operator delete( _In_opt_ void* const pv ) + { + void* pvT = pv; + CPool::Free( &pvT ); + } + VolumeId Volumeid() const { return m_volumeid; } FileId Fileid() const { return m_fileid; } FileSerial Fileserial() const { return m_fileserial; } @@ -867,6 +888,11 @@ class TFileFilter // ff CCountedInvasiveList& IlRequestsByIO() { return m_ilRequestsByIO; } + static void Cleanup() + { + CPool::Cleanup(); + } + private: const VolumeId m_volumeid; @@ -879,6 +905,8 @@ class TFileFilter // ff typename CCountedInvasiveList::CElement m_ileRequestsByIO; }; +#pragma pop_macro( "new" ) + private: ERR ErrRequest( _In_ IFileSystemConfiguration* const pfsconfig, @@ -2400,6 +2428,7 @@ TFileFilter::~TFileFilter() template void TFileFilter::Cleanup() { + CThreadLocalStorage::Cleanup(); CIOComplete::Cleanup(); CThreadLocalStorageRepository::Cleanup(); CThrottleContextRepository::Cleanup(); From 31215a40d8d233e30ab07aebc2b0150f4c925c1a Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Thu, 29 Sep 2022 15:23:34 +0000 Subject: [PATCH 041/102] ESE: JetRenameColumn errata JetRenameColumn has bad error handling where if an error occurs after it commits its trx then it will still try to rollback. This used to be silently ignored but now it causes an Enforce failure. This bad error handling has been fixed. JetRenameColumn does not work properly on a versioned column (e.g. recently added). Currently, we will crash with the above error. Previously, we would succeed in updating the catalog but the TDB wasn't updated and we would either return JET_errColumnNotFound or crash. We now explicitly reject this case with JET_errIllegalOperation. [Substrate:d01bbfebd903ad15e2618e0fa2a1663492891bb7] --- dev/ese/src/ese/cat.cxx | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/dev/ese/src/ese/cat.cxx b/dev/ese/src/ese/cat.cxx index 9702d0f2..dc3a0757 100644 --- a/dev/ese/src/ese/cat.cxx +++ b/dev/ese/src/ese/cat.cxx @@ -10350,6 +10350,7 @@ ERR ErrCATRenameColumn( // ================================================================ { ERR err = JET_errSuccess; + BOOL fRollback = fFalse; const INT cbSzNameNew = (ULONG)strlen( szNameNew ) + 1; Assert( cbSzNameNew > 1 ); @@ -10365,7 +10366,8 @@ ERR ErrCATRenameColumn( BOOL fPrimaryIndexPlaceholder = fFalse; Assert( 0 == ppib->Level() ); - CallR( ErrDIRBeginTransaction( ppib, 34533, NO_GRBIT ) ); + Call( ErrDIRBeginTransaction( ppib, 34533, NO_GRBIT ) ); + fRollback = fTrue; objidTable = pfcbTable->ObjidFDP(); @@ -10388,13 +10390,18 @@ ERR ErrCATRenameColumn( Call( ErrERRCheck( JET_errColumnNotFound ) ); } + if ( FFIELDVersioned( pfield->ffield ) ) + { + Call( ErrERRCheck( JET_errIllegalOperation ) ); + } + pfcbTable->EnterDDL(); // put the new column name in the mempool // do this before getting the FIELD in case we re-arrange the mempool err = ptdbTable->MemPool().ErrAddEntry( (BYTE *)szNameNew, cbSzNameNew, &itagColumnNameNew ); - if( err < 0 ) + if ( err < 0 ) { pfcbTable->LeaveDDL(); Call( err ); @@ -10460,6 +10467,7 @@ ERR ErrCATRenameColumn( // once the commit succeeds, no errors can be generated Call( ErrDIRCommitTransaction( ppib, NO_GRBIT ) ); + fRollback = fFalse; pfcbTable->EnterDML(); @@ -10496,14 +10504,14 @@ ERR ErrCATRenameColumn( HandleError: - if( 0 != itagColumnNameNew ) + if ( 0 != itagColumnNameNew ) { pfcbTable->EnterDDL(); ptdbTable->MemPool().DeleteEntry( itagColumnNameNew ); pfcbTable->LeaveDDL(); } - if( err < 0 ) + if ( fRollback ) { CallSx( ErrDIRRollback( ppib ), JET_errRollbackError ); } From 894afd3a643c8d523bfeb406d5caca799412bae5 Mon Sep 17 00:00:00 2001 From: Alexandre Costa Date: Thu, 29 Sep 2022 17:56:51 +0000 Subject: [PATCH 042/102] Hybrid Shrink Phase 0.2: permanently enable lgposLastResize-based DB extension redo. [Substrate:266e492a43f1295347f5916f72259feb4020e013] --- dev/ese/src/ese/_log/logredo.cxx | 15 ++------------- dev/ese/src/ese/io.cxx | 5 ++--- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/dev/ese/src/ese/_log/logredo.cxx b/dev/ese/src/ese/_log/logredo.cxx index 3db3061a..f6cf1d3b 100644 --- a/dev/ese/src/ese/_log/logredo.cxx +++ b/dev/ese/src/ese/_log/logredo.cxx @@ -4710,7 +4710,6 @@ ERR LOG::ErrLGRISetupFMPFromAttach( IFMP ifmp = ifmpNil; RSTMAP* psrtmap = NULL; ULONG pctCachePriority = g_pctCachePriorityUnassigned; - JET_GRBIT grbitShrinkDatabaseOptions = NO_GRBIT; pifmp = pifmp ? pifmp : &ifmp; pirstmap = pirstmap ? pirstmap : &irstmap; @@ -4761,13 +4760,12 @@ ERR LOG::ErrLGRISetupFMPFromAttach( // and set below in the FMP. Once recovery is finished, the DB needs to go through // JetAttachDatabase anyways, so all DB parameters will be parsed and consumed then. // - Call( ErrDBParseDbParams( psrtmap ? psrtmap->rgsetdbparam : NULL, psrtmap ? psrtmap->csetdbparam : 0, NULL, // JET_dbparamDbSizeMaxPages (not used here). &pctCachePriority, // JET_dbparamCachePriority. - &grbitShrinkDatabaseOptions, // JET_dbparamShrinkDatabaseOptions. + NULL, // JET_dbparamShrinkDatabaseOptions (not used here). NULL, // JET_dbparamShrinkDatabaseTimeQuota (not used here). NULL, // JET_dbparamShrinkDatabaseSizeLimit (not used here). NULL, // JET_dbparamLeakReclaimerEnabled (not used here). @@ -4815,7 +4813,6 @@ ERR LOG::ErrLGRISetupFMPFromAttach( pfmpT->ResetDeferredAttach(); pfmpT->SetPctCachePriorityFmp( pctCachePriority ); - pfmpT->SetShrinkDatabaseOptions( grbitShrinkDatabaseOptions ); FMP::EnterFMPPoolAsWriter(); pfmpT->SetLogOn(); @@ -11055,12 +11052,6 @@ ERR LOG::ErrLGRIRedoExtendDB( const LREXTENDDB * const plrdbextension ) const BOOL fLgposLastResizeSet = ( CmpLgpos( lgposLastResize, lgposMin ) != 0 ); const INT icmpLgposLastVsCurrent = CmpLgpos( lgposLastResize, m_lgposRedo ); -#ifndef DEBUG - const BOOL fMaySkipOlderResize = fLgposLastResizeSet && pfmp->FShrinkDatabaseEofOnAttach(); -#else - const BOOL fMaySkipOlderResize = fLgposLastResizeSet; -#endif - Assert( !fLgposLastResizeSet || fLgposLastResizeSupported ); { OnDebug( PdbfilehdrReadOnly pdbfilehdr = pfmp->Pdbfilehdr() ); @@ -11072,9 +11063,7 @@ ERR LOG::ErrLGRIRedoExtendDB( const LREXTENDDB * const plrdbextension ) // that may have been initiated after the physical resizing of the file and the stamping of lgposLastResize to // the header, but before the logical file size is updated post-OE operation. In that case, not replaying a // matching lgposLastResize would leave the file with the smaller (logical) size captured by backup-start. - if ( fMaySkipOlderResize && - fLgposLastResizeSet && - ( icmpLgposLastVsCurrent > 0 ) ) + if ( fLgposLastResizeSet && ( icmpLgposLastVsCurrent > 0 ) ) { OSTraceFMP( ifmp, JET_tracetagSpaceManagement, OSFormat( "%hs: Skipping ExtendDB because we're replaying the initial required range and we haven't reached the last resize yet.", __FUNCTION__ ) ); diff --git a/dev/ese/src/ese/io.cxx b/dev/ese/src/ese/io.cxx index ea8e2694..70158456 100644 --- a/dev/ese/src/ese/io.cxx +++ b/dev/ese/src/ese/io.cxx @@ -1286,9 +1286,8 @@ ERR ErrIOResizeUpdateDbHdrLgposLast( const IFMP ifmp, const LGPOS& lgposLastResi } else if ( icmpLgposLastVsCurrent < 0 ) { - Assert( !pfmp->FShrinkDatabaseEofOnAttach() && - ( PinstFromIfmp( ifmp )->m_plog->FRecoveringMode() == fRecoveringRedo ) ); - + // we don't expect to ever go back under normal resize conditions. + Assert( fFalse ); } } From ed857b4eb989ff7656c2c09e8d99658d3054dcf0 Mon Sep 17 00:00:00 2001 From: Alexandre Costa Date: Fri, 30 Sep 2022 16:10:19 +0000 Subject: [PATCH 043/102] Hybrid Shrink Phase 0.3: improve DB Shrink optics. [Substrate:30f72de670af47aab3aa356f2cee5ac5a7b736fa] --- dev/ese/src/_res/jetmsg.mc | 12 ++- dev/ese/src/ese/dbshrink.cxx | 173 ++++++++++++++++++++++++----------- 2 files changed, 131 insertions(+), 54 deletions(-) diff --git a/dev/ese/src/_res/jetmsg.mc b/dev/ese/src/_res/jetmsg.mc index 9b387e57..fc64cb61 100644 --- a/dev/ese/src/_res/jetmsg.mc +++ b/dev/ese/src/_res/jetmsg.mc @@ -2513,11 +2513,15 @@ Pages shelved: %24 page(s).%n Pages unleaked: %25 page(s).%n Return code: %17%n Stop reason: %18%n +Small-space trees converted: %27 tree(s).%n +Root pages moved (regular, space): %28 page(s), %29 page(s).%n +Strictly-internal pages moved (regular, space): %30 page(s), %31 page(s).%n +Strictly-leaf pages moved (regular, space): %32 page(s), %33 page(s).%n Total time: %5 minute(s) and %6 second(s).%n Pct. time in extent maintenance: %19%%%n Pct. time in file truncation: %20%%%n Pct. time in page categorization: %21%%%n -Pct. time in data move: %22%%%n +Pct. time in page processing: %22%% (%26%% in page moves)%n Pct. remaining time: %23%%%n . @@ -2534,11 +2538,15 @@ Pages shelved: %24 page(s).%n Pages unleaked: %25 page(s).%n Error code: %17%n Stop reason: %18%n +Small-space trees converted: %27 tree(s).%n +Root pages moved (regular, space): %28 page(s), %29 page(s).%n +Strictly-internal pages moved (regular, space): %30 page(s), %31 page(s).%n +Strictly-leaf pages moved (regular, space): %32 page(s), %33 page(s).%n Total time: %5 minute(s) and %6 second(s).%n Pct. time in extent maintenance: %19%%%n Pct. time in file truncation: %20%%%n Pct. time in page categorization: %21%%%n -Pct. time in data move: %22%%%n +Pct. time in page processing: %22%% (%26%% in page moves)%n Pct. remaining time: %23%%%n . diff --git a/dev/ese/src/ese/dbshrink.cxx b/dev/ese/src/ese/dbshrink.cxx index 7923958a..e6741811 100644 --- a/dev/ese/src/ese/dbshrink.cxx +++ b/dev/ese/src/ese/dbshrink.cxx @@ -36,16 +36,33 @@ VOID SHKIShrinkEofTracingEnd( _Out_ CPRINTF** ppcprintfShrinkTraceRaw ) // Top-level shrink functions. // +typedef struct ShrinkExtMoveStats : public CZeroInit +{ + // .ctor + ShrinkExtMoveStats() : CZeroInit( sizeof( ShrinkExtMoveStats ) ) {} + + // Data. + CPG cpgMoved; + CPG cpgShelved; + CPG cpgUnleaked; + CPG cpgRootMoved; + CPG cpgInternalMoved; + CPG cpgLeafMoved; + CPG cpgRootSpaceMoved; + CPG cpgInternalSpaceMoved; + CPG cpgLeafSpaceMoved; + ULONG cSmallSpaceTreesConverted; + HRT dhrtPageCategorization; + HRT dhrtPageProcessing; + HRT dhrtPageMoves; +} ShrinkExtMoveStats; + LOCAL ERR ErrSHKIMoveLastExtent( _In_ PIB* ppib, _In_ const IFMP ifmp, _In_ const HRT hrtStarted, _In_ CPRINTF* const pcprintfShrinkTraceRaw, - _Inout_ CPG* const pcpgMoved, - _Inout_ CPG* const pcpgShelved, - _Inout_ CPG* const pcpgUnleaked, - _Inout_ HRT* const pdhrtPageCategorization, - _Inout_ HRT* const pdhrtDataMove, + _Inout_ ShrinkExtMoveStats* const psems, _Out_ ShrinkDoneReason* const psdr, _Out_ PGNO* const ppgnoLastProcessed, _Out_ PGNO* const pgnoFirstFromLastExtentMoved, @@ -94,13 +111,9 @@ LOCAL ERR ErrSHKIMoveLastExtent( SpaceCatCtx* pSpCatCtx = NULL; BFLatch bfl; BOOL fPageLatched = fFalse; - BOOL fPageCategorization = fFalse; - BOOL fDataMove = fFalse; - HRT hrtPageCategorizationStart = 0; - HRT hrtDataMoveStart = 0; - CPG cpgMoved = 0; - CPG cpgShelved = 0; - CPG cpgUnleaked = 0; + BOOL fPageCategorization = fFalse, fPageProcessing = fFalse, fPageMove = fFalse; + HRT hrtPageCategorizationStart = 0, hrtPageProcessingStart = 0, hrtPageMoveStart = 0; + CPG cpgShelved = 0, cpgUnleaked = 0; PIBTraceContextScope tcScope = ppib->InitTraceContextScope(); tcScope->iorReason.SetIort( iortDbShrink ); @@ -160,10 +173,16 @@ LOCAL ERR ErrSHKIMoveLastExtent( { while ( pgnoCurrent <= pgnoLast ) { - if ( fDataMove ) + if ( fPageProcessing ) { - *pdhrtDataMove += DhrtHRTElapsedFromHrtStart( hrtDataMoveStart ); - fDataMove = fFalse; + psems->dhrtPageProcessing += DhrtHRTElapsedFromHrtStart( hrtPageProcessingStart ); + fPageProcessing = fFalse; + } + + if ( fPageMove ) + { + psems->dhrtPageMoves += DhrtHRTElapsedFromHrtStart( hrtPageMoveStart ); + fPageMove = fFalse; } // Resume to normal pass type if we're past the proper page or if @@ -252,7 +271,7 @@ LOCAL ERR ErrSHKIMoveLastExtent( &pSpCatCtx ) ); Assert( !FSPSpaceCatUnknown( spcatfCurrent ) ); // We should not get this here. Assert( !FSPSpaceCatNotOwnedEof( spcatfCurrent ) ); // We should not get this here because we're only processing known-owned pages. - *pdhrtPageCategorization += DhrtHRTElapsedFromHrtStart( hrtPageCategorizationStart ); + psems->dhrtPageCategorization += DhrtHRTElapsedFromHrtStart( hrtPageCategorizationStart ); fPageCategorization = fFalse; // It is not possible to handle these. The database is now effectively unshrinkable. @@ -298,8 +317,8 @@ LOCAL ERR ErrSHKIMoveLastExtent( *ppgnoLastProcessed = pgnoCurrent; *pspcatfLastProcessed = spcatfCurrent; - fDataMove = fTrue; - hrtDataMoveStart = HrtHRTCount(); + fPageProcessing = fTrue; + hrtPageProcessingStart = HrtHRTCount(); // Indeterminate and leaked pages share some common handling. if ( FSPSpaceCatIndeterminate( spcatfCurrent ) || FSPSpaceCatLeaked( spcatfCurrent ) ) @@ -630,6 +649,7 @@ LOCAL ERR ErrSHKIMoveLastExtent( } Call( ErrSPBurstSpaceTrees( pSpCatCtx->pfucb ) ); + psems->cSmallSpaceTreesConverted++; // Next iteration will see the same page as large space and move it appropriately. fMovedPage = fTrue; @@ -688,6 +708,9 @@ LOCAL ERR ErrSHKIMoveLastExtent( if ( pfmp->FEfvSupported( JET_efvRootPageMove ) ) { + fPageMove = fTrue; + hrtPageMoveStart = HrtHRTCount(); + // Note that we currently only support moving all roots of a tree (root itself, OE and AE root) // at the same time. So depending on what kind of root we are processing, we need to pass the // actual root of the tree. @@ -701,7 +724,9 @@ LOCAL ERR ErrSHKIMoveLastExtent( fMovedPage = fTrue; (*pcprintfShrinkTraceRaw)( "ShrinkMoveRoot[%I32u:%I32u:%I32u:%d]\r\n", objidCurrent, pgnoCurrent, pgnoFDP, (int)spcatfCurrent ); - cpgMoved += 3; // FDP + OE + AE. + psems->cpgMoved += 3; // FDP + OE + AE. + psems->cpgRootMoved++; + psems->cpgRootSpaceMoved += 2; continue; } else @@ -716,6 +741,9 @@ LOCAL ERR ErrSHKIMoveLastExtent( { Assert( !FSPSpaceCatSmallSpace( spcatfCurrent ) ); + fPageMove = fTrue; + hrtPageMoveStart = HrtHRTCount(); + const BOOL fSpacePage = FSPSpaceCatAnySpaceTree( spcatfCurrent ); err = ErrBTPageMove( @@ -750,7 +778,29 @@ LOCAL ERR ErrSHKIMoveLastExtent( fMovedPage = fTrue; (*pcprintfShrinkTraceRaw)( "ShrinkMove[%I32u:%I32u:%d]\r\n", objidCurrent, pgnoCurrent, (int)spcatfCurrent ); - cpgMoved++; + psems->cpgMoved++; + if ( FSPSpaceCatStrictlyLeaf( spcatfCurrent ) ) + { + if ( fSpacePage ) + { + psems->cpgLeafSpaceMoved++; + } + else + { + psems->cpgLeafMoved++; + } + } + else + { + if ( fSpacePage ) + { + psems->cpgInternalSpaceMoved++; + } + else + { + psems->cpgInternalMoved++; + } + } continue; } @@ -762,10 +812,16 @@ LOCAL ERR ErrSHKIMoveLastExtent( goto HandleError; } // end while ( pgnoCurrent <= pgnoLast ) - if ( fDataMove ) + if ( fPageProcessing ) { - *pdhrtDataMove += DhrtHRTElapsedFromHrtStart( hrtDataMoveStart ); - fDataMove = fFalse; + psems->dhrtPageProcessing += DhrtHRTElapsedFromHrtStart( hrtPageProcessingStart ); + fPageProcessing = fFalse; + } + + if ( fPageMove ) + { + psems->dhrtPageMoves += DhrtHRTElapsedFromHrtStart( hrtPageMoveStart ); + fPageMove = fFalse; } // If we made all the way with a lookup pending, go back and re-evaluate. @@ -792,18 +848,25 @@ LOCAL ERR ErrSHKIMoveLastExtent( } // end while ( fTrue ) HandleError: - Assert( !( fPageCategorization && fDataMove ) ); + Assert( !( fPageMove && !fPageProcessing ) ); + Assert( !( fPageCategorization && fPageProcessing ) ); if ( fPageCategorization ) { - *pdhrtPageCategorization += DhrtHRTElapsedFromHrtStart( hrtPageCategorizationStart ); + psems->dhrtPageCategorization += DhrtHRTElapsedFromHrtStart( hrtPageCategorizationStart ); fPageCategorization = fFalse; } - if ( fDataMove ) + if ( fPageProcessing ) + { + psems->dhrtPageProcessing += DhrtHRTElapsedFromHrtStart( hrtPageProcessingStart ); + fPageProcessing = fFalse; + } + + if ( fPageMove ) { - *pdhrtDataMove += DhrtHRTElapsedFromHrtStart( hrtDataMoveStart ); - fDataMove = fFalse; + psems->dhrtPageMoves += DhrtHRTElapsedFromHrtStart( hrtPageMoveStart ); + fPageMove = fFalse; } if ( fPageLatched ) @@ -839,9 +902,8 @@ LOCAL ERR ErrSHKIMoveLastExtent( AssertTrack( cpgUnleaked <= cpgLastOE, "ShrinkMoveTooManyPagesUnleaked" ); } - *pcpgMoved += cpgMoved; - *pcpgShelved += cpgShelved; - *pcpgUnleaked += cpgUnleaked; + psems->cpgShelved += cpgShelved; + psems->cpgUnleaked += cpgUnleaked; return err; } @@ -867,15 +929,12 @@ ERR ErrSHKShrinkDbFromEof( QWORD cbSizeOwnedFinal = 0; CPRINTF* pcprintfShrinkTraceRaw = NULL; const HRT hrtStarted = HrtHRTCount(); - CPG cpgMoved = 0, cpgShelved = 0, cpgUnleaked = 0; + ShrinkExtMoveStats sems; ShrinkDoneReason sdr = sdrNone; PGNO pgnoFirstFromLastExtentShrunkPrev = pgnoNull; PGNO pgnoLastProcessed = pgnoNull; SpaceCategoryFlags spcatfLastProcessed = spcatfNone; - HRT dhrtExtMaint = 0; - HRT dhrtFileTruncation = 0; - HRT dhrtPageCategorization = 0; - HRT dhrtDataMove = 0; + HRT dhrtExtMaint = 0, dhrtFileTruncation = 0; BOOL fDbMayHaveChanged = fFalse; Assert( !pfmp->FIsTempDB() ); @@ -1014,11 +1073,7 @@ ERR ErrSHKShrinkDbFromEof( ifmp, hrtStarted, pcprintfShrinkTraceRaw, - &cpgMoved, - &cpgShelved, - &cpgUnleaked, - &dhrtPageCategorization, - &dhrtDataMove, + &sems, &sdr, &pgnoLastProcessed, &pgnoFirstFromLastExtentMoved, @@ -1070,9 +1125,17 @@ ERR ErrSHKShrinkDbFromEof( HandleError: #ifdef DEBUG - Assert( cpgMoved >= 0 ); - Assert( cpgShelved >= 0 ); - Assert( cpgUnleaked >= 0 ); + Assert( sems.cpgMoved >= 0 ); + Assert( sems.cpgRootMoved >= 0 ); + Assert( sems.cpgRootSpaceMoved >= 0 ); + Assert( sems.cpgInternalMoved >= 0 ); + Assert( sems.cpgInternalSpaceMoved >= 0 ); + Assert( sems.cpgLeafMoved >= 0 ); + Assert( sems.cpgLeafSpaceMoved >= 0 ); + Assert( sems.cpgMoved == ( sems.cpgRootMoved + sems.cpgRootSpaceMoved + sems.cpgInternalMoved + sems.cpgInternalSpaceMoved + sems.cpgLeafMoved + sems.cpgLeafSpaceMoved ) ); + Assert( sems.cpgShelved >= 0 ); + Assert( sems.cpgUnleaked >= 0 ); + Assert( sems.cSmallSpaceTreesConverted >= 0 ); Assert( err != errSPNoSpaceBelowShrinkTarget ); if ( err == JET_wrnShrinkNotPossible ) { @@ -1150,9 +1213,10 @@ ERR ErrSHKShrinkDbFromEof( const double dblSecElapsed = dblSecTotalElapsed - (double)dwMinElapsed * 60.0; dhrtExtMaint = min( dhrtExtMaint, dhrtElapsed ); dhrtFileTruncation = min( dhrtFileTruncation, dhrtElapsed ); - dhrtPageCategorization = min( dhrtPageCategorization, dhrtElapsed ); - dhrtDataMove = min( dhrtDataMove, dhrtElapsed ); - HRT dhrtRemaining = dhrtElapsed - ( dhrtExtMaint + dhrtFileTruncation + dhrtPageCategorization + dhrtDataMove ); + sems.dhrtPageCategorization = min( sems.dhrtPageCategorization, dhrtElapsed ); + sems.dhrtPageProcessing = min( sems.dhrtPageProcessing, dhrtElapsed ); + sems.dhrtPageMoves = min( sems.dhrtPageMoves, dhrtElapsed ); + HRT dhrtRemaining = dhrtElapsed - ( dhrtExtMaint + dhrtFileTruncation + sems.dhrtPageCategorization + sems.dhrtPageProcessing ); dhrtRemaining = max( dhrtRemaining, 0 ); const WCHAR* rgwsz[] = { @@ -1162,16 +1226,21 @@ ERR ErrSHKShrinkDbFromEof( OSFormatW( L"%I64u", cbSizeFileFinal ), OSFormatW( L"%d", pfmp->CpgOfCb( cbSizeFileFinal ) ), OSFormatW( L"%I64u", cbSizeOwnedInitial ), OSFormatW( L"%d", pfmp->CpgOfCb( cbSizeOwnedInitial ) ), OSFormatW( L"%I64u", cbSizeOwnedFinal ), OSFormatW( L"%d", pfmp->CpgOfCb( cbSizeOwnedFinal ) ), - OSFormatW( L"%I64u", pfmp->CbOfCpg( cpgMoved ) ), OSFormatW( L"%d", cpgMoved ), + OSFormatW( L"%I64u", pfmp->CbOfCpg( sems.cpgMoved ) ), OSFormatW( L"%d", sems.cpgMoved ), OSFormatW( L"%d", err ), OSFormatW( L"%I32u:%d:0x%08I32x", pgnoLastProcessed, (int)sdr, (DWORD)spcatfLastProcessed ), OSFormatW( L"%.2f", ( 100.0 * (double)dhrtExtMaint ) / (double)dhrtElapsed ), OSFormatW( L"%.2f", ( 100.0 * (double)dhrtFileTruncation ) / (double)dhrtElapsed ), - OSFormatW( L"%.2f", ( 100.0 * (double)dhrtPageCategorization ) / (double)dhrtElapsed ), - OSFormatW( L"%.2f", ( 100.0 * (double)dhrtDataMove ) / (double)dhrtElapsed ), + OSFormatW( L"%.2f", ( 100.0 * (double)sems.dhrtPageCategorization ) / (double)dhrtElapsed ), + OSFormatW( L"%.2f", ( 100.0 * (double)sems.dhrtPageProcessing ) / (double)dhrtElapsed ), OSFormatW( L"%.2f", ( 100.0 * (double)dhrtRemaining ) / (double)dhrtElapsed ), - OSFormatW( L"%d", cpgShelved ), - OSFormatW( L"%d", cpgUnleaked ) + OSFormatW( L"%d", sems.cpgShelved ), + OSFormatW( L"%d", sems.cpgUnleaked ), + OSFormatW( L"%.2f", ( 100.0 * (double)sems.dhrtPageMoves ) / (double)dhrtElapsed ), + OSFormatW( L"%lu", sems.cSmallSpaceTreesConverted ), + OSFormatW( L"%d", sems.cpgRootMoved ), OSFormatW( L"%d", sems.cpgRootSpaceMoved ), + OSFormatW( L"%d", sems.cpgInternalMoved ), OSFormatW( L"%d", sems.cpgInternalSpaceMoved ), + OSFormatW( L"%d", sems.cpgLeafMoved ), OSFormatW( L"%d", sems.cpgLeafSpaceMoved ) }; UtilReportEvent( ( err < JET_errSuccess ) ? eventError : eventInformation, From db3072ef894a0b6cd0eabab11023a1f4972f6574 Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Fri, 30 Sep 2022 23:10:53 +0000 Subject: [PATCH 044/102] ESE Block Cache: fix IFileIdentification::ErrGetFileId to ignore all unsupported scenarios ReFS volumes present a problem, these volumes generate a FileId that doesn't fit into the 8b FileId used by EBC. So we don't support ReFS at this time. Previously, we would error out if we saw storage that we don't support. We will now silently ignore any storage we cannot support and return the special value of volumeidInvalid/fileidInvalid to indicate that the file cannot participate in EBC either as a cached file or a caching file. [Substrate:04d39343b298c570225d292cde47dbcbfbb25390] --- .../src/os/blockcache/_fileidentification.hxx | 106 +++++++++--------- 1 file changed, 50 insertions(+), 56 deletions(-) diff --git a/dev/ese/src/os/blockcache/_fileidentification.hxx b/dev/ese/src/os/blockcache/_fileidentification.hxx index 06eb25ce..fd17d01c 100644 --- a/dev/ese/src/os/blockcache/_fileidentification.hxx +++ b/dev/ese/src/os/blockcache/_fileidentification.hxx @@ -388,13 +388,15 @@ ERR TFileIdentification::ErrGetFileId( _In_z_ const WCHAR* const wszP _Out_ VolumeId* const pvolumeid, _Out_opt_ FileId* const pfileid ) { - ERR err = JET_errSuccess; - HANDLE hObject = NULL; - BOOL fSuccess = FALSE; - FILE_ID_INFO fileIdInfo = { 0 }; - VolumeId volumeid = volumeidInvalid; - CVolumeHandleCacheEntry* pvhce = NULL; - FileId fileid = fileidInvalid; + ERR err = JET_errSuccess; + HANDLE hObject = NULL; + BOOL fSuccess = FALSE; + FILE_ID_INFO fileIdInfo = { 0 }; + VolumeId volumeidCandidate = volumeidInvalid; + FileId fileidCandidate = fileidInvalid; + CVolumeHandleCacheEntry* pvhce = NULL; + VolumeId volumeid = volumeidInvalid; + FileId fileid = fileidInvalid; *phFile = NULL; *pvolumeid = volumeidInvalid; @@ -436,63 +438,48 @@ ERR TFileIdentification::ErrGetFileId( _In_z_ const WCHAR* const wszP } } + // extract the volume id and file id + if ( fSuccess ) { - // get the volume id - - volumeid = (VolumeId)fileIdInfo.VolumeSerialNumber; - - // defend against illegal values for the volumeid - - if ( volumeid == volumeidInvalid ) - { - Error( ErrBlockCacheInternalError( wszPath, "InvalidVolumeId" ) ); - } - - // try to get the volume handle for the volume id. this will only work if it is a local volume - - err = ErrOpenVolumeById( volumeid, &pvhce ); - err = err == JET_errInvalidPath ? JET_errSuccess : err; - Call( err ); - - // get the file id if requested - - if ( pfileid ) - { - // if this is a local volume then return a normal file id - - if ( pvhce ) - { - // extract the file id - - fileid = *( (FileId*)fileIdInfo.FileId.Identifier ); + volumeidCandidate = (VolumeId)fileIdInfo.VolumeSerialNumber; + fileidCandidate = *((FileId*)fileIdInfo.FileId.Identifier); + } - // defend against illegal values for the file id + // try to get the volume handle for the volume id. this will only work if it is a local volume - if ( fileid == fileidInvalid ) - { - Error( ErrBlockCacheInternalError( wszPath, "InvalidFileId" ) ); - } + err = ErrOpenVolumeById( volumeidCandidate, &pvhce ); + err = err == JET_errInvalidPath ? JET_errSuccess : err; + Call( err ); - // defend against truncation of the file id because we cannot handle that + // if the file id is truncated then invalidate it because we cannot handle that - FILE_ID_128 fileId; - memset( fileId.Identifier, 0, _cbrg( fileId.Identifier ) ); - *( (FileId*)fileId.Identifier ) = fileid; - if ( memcmp( fileIdInfo.FileId.Identifier, fileId.Identifier, sizeof( fileId.Identifier ) ) ) - { - Error( ErrBlockCacheInternalError( wszPath, "TruncatedFileId" ) ); - } - } + FILE_ID_128 fileId; + memset( fileId.Identifier, 0, _cbrg( fileId.Identifier ) ); + *((FileId*)fileId.Identifier) = fileidCandidate; + if ( memcmp( fileIdInfo.FileId.Identifier, fileId.Identifier, sizeof( fileId.Identifier ) ) ) + { + fileidCandidate = fileidInvalid; + } - // if this is not a local volume then return an invalid file id to prevent caching + // if the volume id and file id are valid then return them, enabling caching for this file - else - { - volumeid = volumeidInvalid; - fileid = fileidInvalid; - } - } + if ( volumeidCandidate == volumeidInvalid ) + { + // if the volume id is invalid then we don't support a file id for this file + } + else if ( !pvhce ) + { + // if the volume is not a local volume then we don't support a file id for this file + } + else if ( fileidCandidate == fileidInvalid ) + { + // if the file id is invalid then we don't support a file id for this file + } + else + { + volumeid = volumeidCandidate; + fileid = fileidCandidate; } // return the object, volume id, and file id @@ -604,6 +591,13 @@ ERR TFileIdentification::ErrOpenVolumeByIdCacheMiss( _In_ cons *pwszPath = NULL; *phVolume = NULL; + // immediately reject any attempt to find volumeidInvalid + + if ( volumeid == volumeidInvalid ) + { + Error( ErrERRCheck( JET_errInvalidPath ) ); + } + // walk all volumes in the system and try to find one that matches the volume id hFindVolume = FindFirstVolumeW( wszVolume, cwchVolumeMax ); From 013cd6b51dff07efff9de64ec2328a0cbfc540d8 Mon Sep 17 00:00:00 2001 From: Alexandre Costa Date: Tue, 4 Oct 2022 18:46:05 +0000 Subject: [PATCH 045/102] Use OBJID to match FCB obtained by hashing pgnoFDP during leak report [Substrate:c2c327b36b5ef910604740bb682db5720c809aeb] --- dev/ese/src/ese/dbutil.cxx | 14 +++++++++++--- dev/ese/src/ese/fcb.cxx | 11 +++++++++-- dev/ese/src/inc/fcb.hxx | 2 +- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/dev/ese/src/ese/dbutil.cxx b/dev/ese/src/ese/dbutil.cxx index 663aef4c..ec57b4b1 100644 --- a/dev/ese/src/ese/dbutil.cxx +++ b/dev/ese/src/ese/dbutil.cxx @@ -5307,7 +5307,7 @@ LOCAL ERR ErrDBUTLIEstimateRootSpaceLeak( PIB* const ppib, const IFMP ifmp ) } else { - if ( ( err == JET_errRecordNotFound ) || ( err == JET_errNotInitialized ) ) + if ( ( err == JET_errRecordNotFound ) || ( err == JET_errNotInitialized ) || ( err == JET_errRecordDeleted ) ) { err = JET_errSuccess; } @@ -5343,8 +5343,16 @@ LOCAL ERR ErrDBUTLIEstimateRootSpaceLeak( PIB* const ppib, const IFMP ifmp ) { // We are probably racing with table deletion. FCBStateFlags fcbsf = fcbsfNone; - const BOOL fFoundFcb = ( FCB::PfcbFCBGet( ifmp, pgnoFDPLast, &fcbsf, fFalse /* fIncrementRefCount */, fTrue /* fInitForRecovery */ ) != pfcbNil ); - const BOOL fDeletePending = fFoundFcb && ( fcbsf & fcbsfDeletePending ); + OBJID objidFcb = objidNil; + const BOOL fFoundFcb = ( FCB::PfcbFCBGet( + ifmp, + pgnoFDPLast, + &fcbsf, + fFalse, // fIncrementRefCount + fTrue, // fInitForRecovery + &objidFcb ) != pfcbNil ) && + ( objidFcb == objidLast ); + const BOOL fDeletePending = fFoundFcb && ( ( fcbsf & fcbsfDeletePending ) != 0 ); if ( fFoundFcb ) { diff --git a/dev/ese/src/ese/fcb.cxx b/dev/ese/src/ese/fcb.cxx index 3c851132..cd866ad5 100644 --- a/dev/ese/src/ese/fcb.cxx +++ b/dev/ese/src/ese/fcb.cxx @@ -734,11 +734,12 @@ VOID FCB::UnlinkIDB( FCB *pfcbTable ) // NOTE: this is the proper channel for accessing an FCB; it uses the locking // protocol setup by the FCB hash-table and FCB latch -FCB *FCB::PfcbFCBGet( const IFMP ifmp, const PGNO pgnoFDP, FCBStateFlags* const pfcbsf, const BOOL fIncrementRefCount, const BOOL fInitForRecovery ) +FCB *FCB::PfcbFCBGet( const IFMP ifmp, const PGNO pgnoFDP, FCBStateFlags* const pfcbsf, const BOOL fIncrementRefCount, const BOOL fInitForRecovery, OBJID* const pobjid ) { FCBStateFlags fcbsf = fcbsfNone; INST *pinst = PinstFromIfmp( ifmp ); FCB *pfcbT; + OBJID objid = objidNil; FCBHash::ERR errFCBHash; FCBHash::CLock lockFCBHash; FCBHashKey keyFCBHash( ifmp, pgnoFDP ); @@ -881,7 +882,7 @@ FCB *FCB::PfcbFCBGet( const IFMP ifmp, const PGNO pgnoFDP, FCBStateFlags* const Assert( fcbsf == fcbsfNone ); fcbsf |= fcbsfInitialized; fcbsf |= ( pfcbT->FDeletePending() ? fcbsfDeletePending : fcbsfNone ); - + objid = pfcbT->ObjidFDP(); // If this is the dummy FCB created by recovery, we need to fully populate // it, make sure that the others wait while the first person finishes doing it @@ -910,6 +911,7 @@ FCB *FCB::PfcbFCBGet( const IFMP ifmp, const PGNO pgnoFDP, FCBStateFlags* const // try to get the FCB again fcbsf = fcbsfNone; + objid = objidNil; cRetries++; goto RetrieveFCB; @@ -929,11 +931,16 @@ FCB *FCB::PfcbFCBGet( const IFMP ifmp, const PGNO pgnoFDP, FCBStateFlags* const SetStateAndReturn: // set the state Assert( ( pfcbT == pfcbNil ) == ( fcbsf == fcbsfNone ) ); // Pointer and flag must agree. + Assert( ( pfcbT == pfcbNil ) == ( objid == objidNil ) ); // Pointer and OBJID must agree. Assert( ( fcbsf == fcbsfNone ) || ( fcbsf & fcbsfInitialized ) ); // Can't have any flags set if it's not initialized. if ( pfcbsf ) { *pfcbsf = fcbsf; } + if ( pobjid ) + { + *pobjid = objid; + } // return the FCB Assert( ( pfcbNil == pfcbT ) || ( pfcbT->IsUnlocked_( LOCK_TYPE::ltShared ) && pfcbT->IsUnlocked_( LOCK_TYPE::ltWrite ) ) ); diff --git a/dev/ese/src/inc/fcb.hxx b/dev/ese/src/inc/fcb.hxx index 52fa3ee5..8112f276 100644 --- a/dev/ese/src/inc/fcb.hxx +++ b/dev/ese/src/inc/fcb.hxx @@ -1064,7 +1064,7 @@ private: // ===================================================================== // FCB creation/deletion. public: - static FCB *PfcbFCBGet( const IFMP ifmp, const PGNO pgnoFDP, FCBStateFlags* const pfcbsf = NULL, const BOOL fIncrementRefCount = fTrue, const BOOL fInitForRecovery = fFalse ); + static FCB *PfcbFCBGet( const IFMP ifmp, const PGNO pgnoFDP, FCBStateFlags* const pfcbsf = NULL, const BOOL fIncrementRefCount = fTrue, const BOOL fInitForRecovery = fFalse, OBJID* const pobjid = NULL ); static ERR ErrCreate( PIB *ppib, IFMP ifmp, PGNO pgnoFDP, FCB **ppfcb ); VOID CreateComplete_( ERR err, PCSTR szFile, const LONG lLine ); VOID PrepareForPurge( const BOOL fPrepareChildren = fTrue ); From 2d5b575f222c478d90615b4d694558b6aef8b954 Mon Sep 17 00:00:00 2001 From: Vakishan Date: Wed, 5 Oct 2022 02:06:41 +0000 Subject: [PATCH 046/102] Merge LR should skip adding page for reconciliation if macro begin is outside checkpoint [Substrate:40962734c3f190b52e567adcb6bd86b0df11bf7c] --- dev/ese/src/ese/_log/logredo.cxx | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/dev/ese/src/ese/_log/logredo.cxx b/dev/ese/src/ese/_log/logredo.cxx index f6cf1d3b..0f8f0a51 100644 --- a/dev/ese/src/ese/_log/logredo.cxx +++ b/dev/ese/src/ese/_log/logredo.cxx @@ -2277,13 +2277,23 @@ LOCAL ERR ErrLGRIClearRedoMapDbtimeRevert( PIB* ppib, const LR* const plr, const { const LRMERGE_* const plrmerge = (LRMERGE_*)plr; - // Merge is always done inside a Macro - Assert( fMacroGoing ); - // Add it to the list of pages freed, if empty. if ( plrmerge->FEmptyPage() ) { - CallR( ppib->ErrInsertPgnoFreed( dbtime, ifmp, plrmerge->le_pgno ) ); + // Merge is always done inside a Macro + // If there is no macro for given dbtime, then macro begin must have been outside the checkpoint. + // We should be fine skipping reconciling such a page from dbtimerevert redomap as the macro would have had exclusive latch during merge + // and no other update on the page should be possible concurrently and shouldn't have been added to redomap. + // + if ( !fMacroGoing ) + { + Assert( !pLogRedoMapToClear || !pLogRedoMapToClear->FPgnoSet( plrmerge->le_pgno ) ); + return JET_errSuccess; + } + else + { + CallR( ppib->ErrInsertPgnoFreed( dbtime, ifmp, plrmerge->le_pgno ) ); + } } break; } From 0a9f15ee5a15e76563f92249c7a57530310db326 Mon Sep 17 00:00:00 2001 From: TAW Date: Thu, 6 Oct 2022 02:28:18 +0000 Subject: [PATCH 047/102] Pull strsafe.h out of mainline code "strsafe.h" is OS specific. Move strsafe.h to src\os\string.cxx for ese.dll. Use the routines in string.hxx rather than strsafe.h. Remove any other uses of "strsafe.h" in those places that are already including string.hxx (there are a few places left that use strsafe.h but don't use the OS abstraction layer). Also, remove use of tchar.h. We never use it the way it was intended (i.e. compile twice, once UNICODDE, once, non-UNICODE. It's non-UNICODE (i.e. char) everyone but one place. Doing this to explicitly separate out whcar_t usage in anticipation of replacing wchar_t with WCHAR and basing that on char16_t rather than wchar_t [Substrate:414567bf26e6037dac656a958a61d810851755f0] --- dev/ese/published/inc/os.hxx | 10 + dev/ese/published/inc/os/cprintf.hxx | 131 ++-- dev/ese/published/inc/os/edbg.hxx | 6 - dev/ese/published/inc/os/string.hxx | 268 ++++---- dev/ese/published/inc/os/thread.hxx | 11 +- dev/ese/published/inc/os/trace.hxx | 2 + dev/ese/src/_esefile/esefile.hxx | 7 - dev/ese/src/_perfctrs/perfdata.pl | 10 +- dev/ese/src/_xpress10/xpress10corsica.cxx | 2 +- dev/ese/src/_xpress10/xpress10sw.cxx | 2 +- dev/ese/src/checksum/checksumstd.hxx | 7 - dev/ese/src/ese/_log/logdump.cxx | 60 +- dev/ese/src/ese/_log/logread_legacy.cxx | 4 +- dev/ese/src/ese/_log/logstream.cxx | 2 +- dev/ese/src/ese/_log/logwrite.cxx | 6 +- dev/ese/src/ese/_osu/checksumu.cxx | 58 +- dev/ese/src/ese/_osu/osustd.hxx | 7 - dev/ese/src/ese/bf.cxx | 21 +- dev/ese/src/ese/cat.cxx | 44 +- dev/ese/src/ese/cpage.cxx | 135 ++-- dev/ese/src/ese/cresmgr.cxx | 4 +- dev/ese/src/ese/dataserializer.cxx | 5 +- dev/ese/src/ese/dbscan.cxx | 14 +- dev/ese/src/ese/dbutil.cxx | 2 +- dev/ese/src/ese/fmp.cxx | 8 +- dev/ese/src/ese/io.cxx | 4 +- dev/ese/src/ese/jetapi.cxx | 9 +- dev/ese/src/ese/logapi.cxx | 14 +- dev/ese/src/ese/repair.cxx | 6 +- dev/ese/src/ese/revertsnapshot.cxx | 6 +- dev/ese/src/ese/sysver.cxx | 18 +- dev/ese/src/ese/ver.cxx | 5 +- dev/ese/src/eseutil/_edbutil.hxx | 7 - dev/ese/src/eseutil/dbspacedump.cxx | 11 +- dev/ese/src/eseutil/eseutil.cxx | 49 +- dev/ese/src/inc/_jet.hxx | 8 +- dev/ese/src/inc/_osu/syncu.hxx | 8 +- dev/ese/src/inc/cpage.hxx | 2 +- dev/ese/src/inc/daedef.hxx | 20 +- dev/ese/src/inc/dataserializer.hxx | 2 +- dev/ese/src/inc/esestd.hxx | 13 - dev/ese/src/inc/log.hxx | 4 +- dev/ese/src/inc/old.hxx | 2 +- dev/ese/src/inc/pib.hxx | 2 +- .../interop/CCacheConfigurationWrapper.h | 2 +- .../interop/CCachedFileConfigurationWrapper.h | 2 +- .../blockcache/interop/CFileFindWrapper.h | 2 +- .../interop/CFileIdentificationWrapper.h | 6 +- .../blockcache/interop/CFileSystemWrapper.h | 24 +- .../noncore/blockcache/interop/CFileWrapper.h | 2 +- .../src/noncore/blockcache/interop/Common.h | 2 - .../blockcache/interop/FileSystemBase.h | 4 +- dev/ese/src/noncore/eseshadow/esewriter.hxx | 8 - dev/ese/src/noncore/interop/jetinterop.cpp | 281 ++++---- dev/ese/src/os/_osfile.hxx | 2 +- dev/ese/src/os/_ostls.hxx | 2 +- dev/ese/src/os/cprintf.cxx | 379 +++++++---- dev/ese/src/os/edbg.cxx | 47 +- dev/ese/src/os/encrypt.cxx | 2 +- dev/ese/src/os/error.cxx | 34 +- dev/ese/src/os/memory.cxx | 2 +- dev/ese/src/os/osblockcache.cxx | 4 +- dev/ese/src/os/osfile.cxx | 7 +- dev/ese/src/os/osfs.cxx | 2 +- dev/ese/src/os/osstd_.hxx | 7 - dev/ese/src/os/ostimerqueue.cxx | 4 +- dev/ese/src/os/string.cxx | 601 +++++++++++------- dev/ese/src/os/thread.cxx | 5 +- dev/ese/src/os/trace.cxx | 4 +- dev/ese/src/sync/sync.cxx | 24 +- 70 files changed, 1341 insertions(+), 1134 deletions(-) diff --git a/dev/ese/published/inc/os.hxx b/dev/ese/published/inc/os.hxx index 8c693286..31144af3 100644 --- a/dev/ese/published/inc/os.hxx +++ b/dev/ese/published/inc/os.hxx @@ -261,4 +261,14 @@ BOOL FOSLayerUp(); //#define wcslen __USE_LOSSTRLENGTHW__ #endif +#if 0 +// These are too difficult to get working in test code and +// downstream build locations that include the OS abstraction +// but don't necessarily directly use it. So don't leave them +// turned on. However, leave them here to turn on in an +// ad-hoc manner. +#pragma deprecated( "wcscmp" ) +#pragma deprecated( "wcslen" ) +#endif + #endif // _OS_HXX_INCLUDED diff --git a/dev/ese/published/inc/os/cprintf.hxx b/dev/ese/published/inc/os/cprintf.hxx index 323db650..097d3cc4 100644 --- a/dev/ese/published/inc/os/cprintf.hxx +++ b/dev/ese/published/inc/os/cprintf.hxx @@ -6,7 +6,7 @@ #include - +#include // ================================================================================================================== class CPRINTF @@ -16,10 +16,10 @@ class CPRINTF CPRINTF() {} virtual ~CPRINTF() {} - static void SetThreadPrintfPrefix( _In_ const _TCHAR * szPrefix ); + static void SetThreadPrintfPrefix( _In_ const CHAR * szPrefix ); public: - virtual void __cdecl operator()( const _TCHAR* szFormat, ... ) = 0; + virtual void __cdecl operator()( const CHAR* szFormat, ... ) = 0; }; // ================================================================================================================== @@ -27,12 +27,12 @@ class CPRINTFNULL : public CPRINTF // ================================================================================================================== { public: - void __cdecl operator()( const _TCHAR* szFormat, ... ); + void __cdecl operator()( const CHAR* szFormat, ... ); static CPRINTF* PcprintfInstance(); }; // ================================================================ -INLINE void __cdecl CPRINTFNULL::operator()( const _TCHAR* szFormat, ... ) +INLINE void __cdecl CPRINTFNULL::operator()( const CHAR* szFormat, ... ) // ================================================================ { va_list arg_ptr; @@ -58,7 +58,7 @@ class CPRINTFDBGOUT : public CPRINTF //- { public: - void __cdecl operator()( const _TCHAR* szFormat, ... ); + void __cdecl operator()( const CHAR* szFormat, ... ); static CPRINTF* PcprintfInstance(); }; @@ -68,7 +68,7 @@ class CPRINTFSTDOUT : public CPRINTF // ================================================================================================================== { public: - void __cdecl operator()( const _TCHAR* szFormat, ... ); + void __cdecl operator()( const CHAR* szFormat, ... ); static CPRINTF* PcprintfInstance(); }; @@ -81,12 +81,12 @@ INLINE CPRINTF* CPRINTFSTDOUT::PcprintfInstance() } // ================================================================ -INLINE void __cdecl CPRINTFSTDOUT::operator()( const _TCHAR* szFormat, ... ) +INLINE void __cdecl CPRINTFSTDOUT::operator()( const CHAR* szFormat, ... ) // ================================================================ { va_list arg_ptr; va_start( arg_ptr, szFormat ); - _vtprintf( szFormat, arg_ptr ); + vprintf( szFormat, arg_ptr ); va_end( arg_ptr ); } @@ -97,14 +97,14 @@ class CPRINTINTRINBUF : public CPRINTF { public: CPRINTINTRINBUF(); - + virtual void Reset(); virtual BOOL FContains( _In_z_ const CHAR * const szFind ); virtual ULONG CContains( _In_z_ const CHAR * const szFind ); void Print( CPRINTF & pcprintf ); - void __cdecl operator()( const _TCHAR* szFormat, ... ); + void __cdecl operator()( const CHAR* szFormat, ... ); private: // probably should templatize this buffer size. @@ -199,14 +199,14 @@ INLINE void CPRINTINTRINBUF::Reset() } // ================================================================ -INLINE void __cdecl CPRINTINTRINBUF::operator()( const _TCHAR* szFormat, ... ) +INLINE void __cdecl CPRINTINTRINBUF::operator()( const CHAR* szFormat, ... ) // ================================================================ { CHAR rgchBuf[ 1024 ]; va_list arg_ptr; va_start( arg_ptr, szFormat ); - StringCbVPrintfA( rgchBuf, sizeof( rgchBuf ), (CHAR*)szFormat, arg_ptr ); + OSStrCbVFormatA( rgchBuf, sizeof( rgchBuf ), (CHAR*)szFormat, arg_ptr ); va_end( arg_ptr ); Append_( rgchBuf ); @@ -298,9 +298,9 @@ INLINE void CPRINTINTRINBUF::Print( CPRINTF & cprintf ) while( ( szT = csr.SzNext() ) != NULL ) { if ( fSzId ) - cprintf( (_TCHAR*)"[%d] %hs", i, szT ); + cprintf( (CHAR*)"[%d] %hs", i, szT ); else - cprintf( (_TCHAR*)"%hs", szT ); + cprintf( (CHAR*)"%hs", szT ); i++; } } @@ -312,7 +312,7 @@ class CPRINTFDEBUG : public CPRINTF // ================================================================================================================== { public: - void __cdecl operator()( const _TCHAR* szFormat, ... ); + void __cdecl operator()( const CHAR* szFormat, ... ); static CPRINTF* PcprintfInstance(); }; @@ -325,12 +325,12 @@ INLINE CPRINTF* CPRINTFDEBUG::PcprintfInstance() } // ================================================================ -INLINE void __cdecl CPRINTFDEBUG::operator()( const _TCHAR* szFormat, ... ) +INLINE void __cdecl CPRINTFDEBUG::operator()( const CHAR* szFormat, ... ) // ================================================================ { va_list arg_ptr; va_start( arg_ptr, szFormat ); - _vtprintf( szFormat, arg_ptr ); + vprintf( szFormat, arg_ptr ); va_end( arg_ptr ); } @@ -343,35 +343,26 @@ class CPRINTFFILE : public CPRINTF // ================================================================================================================== { public: - CPRINTFFILE( const WCHAR* wszFile ); - ~CPRINTFFILE(); - - void __cdecl operator()( const _TCHAR* szFormat, ... ); + enum FILEENCODING + { + ASCII, + UTF16 + }; - private: - void* m_hFile; - void* m_hMutex; -}; + CPRINTFFILE( const WCHAR* wszFile, FILEENCODING eEncoding ); + ~CPRINTFFILE(); + + void __cdecl operator()( const CHAR* szFormat, ... ); + void __cdecl operator()( const WCHAR * wszFormat, ... ); + + ERR m_errLast; -// ================================================================================================================== -class CWPRINTFFILE : public CPRINTF -// ================================================================================================================== -{ - public: - CWPRINTFFILE( const WCHAR* szFile ); - ~CWPRINTFFILE(); - - // If _UNICODE is defined, then we only want a single function. - // If _UNICODE is not defined, we need two different functions. -#ifndef _UNICODE - void __cdecl operator()( const _TCHAR* szFormat, ... ); -#endif - void __cdecl operator()( const wchar_t * wszFormat, ... ); - ERR m_errLast; - private: + void __cdecl CPRINTFFILE::PutBytesInFile_( BYTE *pb, ULONG cb ); + void __cdecl CPRINTFFILE::VerifyOnlyDOSTextFileLineReturns_( PCWSTR wsz ); void* m_hFile; void* m_hMutex; + FILEENCODING m_feEncodingType; }; // ================================================================================================================== @@ -379,51 +370,51 @@ class CPRINTFINDENT : public CPRINTF // ================================================================================================================== { public: - CPRINTFINDENT( CPRINTF* pcprintf, const _TCHAR* szPrefix = NULL ); - - void __cdecl operator()( const _TCHAR* szFormat, ... ); + CPRINTFINDENT( CPRINTF* pcprintf, const CHAR* szPrefix = NULL ); + + void __cdecl operator()( const CHAR* szFormat, ... ); virtual void Indent(); virtual void Unindent(); - + protected: CPRINTFINDENT(); - + private: CPRINTF* const m_pcprintf; INT m_cindent; - const _TCHAR* const m_szPrefix; + const CHAR* const m_szPrefix; }; // ================================================================ -INLINE CPRINTFINDENT::CPRINTFINDENT( CPRINTF* pcprintf, const _TCHAR* szPrefix ) : +INLINE CPRINTFINDENT::CPRINTFINDENT( CPRINTF* pcprintf, const CHAR* szPrefix ) : // ================================================================ m_cindent( 0 ), m_pcprintf( pcprintf ), m_szPrefix( szPrefix ) { } - + // ================================================================ -INLINE void __cdecl CPRINTFINDENT::operator()( const _TCHAR* szFormat, ... ) +INLINE void __cdecl CPRINTFINDENT::operator()( const CHAR* szFormat, ... ) // ================================================================ { - _TCHAR rgchBuf[1024]; + CHAR rgchBuf[1024]; va_list arg_ptr; va_start( arg_ptr, szFormat ); - StringCbVPrintf( rgchBuf, sizeof(rgchBuf), szFormat, arg_ptr ); + OSStrCbVFormatA( rgchBuf, sizeof(rgchBuf), szFormat, arg_ptr ); va_end( arg_ptr ); for( INT i = 0; i < m_cindent; i++ ) { - (*m_pcprintf)( _T( "\t" ) ); + (*m_pcprintf)( "\t" ); } if( m_szPrefix ) { - (*m_pcprintf)( _T( "%s" ), m_szPrefix ); + (*m_pcprintf)( "%s", m_szPrefix ); } - (*m_pcprintf)( _T( "%s" ), rgchBuf ); + (*m_pcprintf)( "%s", rgchBuf ); } // ================================================================ @@ -451,7 +442,7 @@ INLINE CPRINTFINDENT::CPRINTFINDENT( ) : m_szPrefix( 0 ) { } - + // ================================================================================================================== class CPRINTFTLSPREFIX : public CPRINTFINDENT @@ -463,17 +454,17 @@ class CPRINTFTLSPREFIX : public CPRINTFINDENT //- { public: - CPRINTFTLSPREFIX( CPRINTF* pcprintf, const _TCHAR * const szPrefix = NULL ); - - void __cdecl operator()( const _TCHAR* szFormat, ... ); + CPRINTFTLSPREFIX( CPRINTF* pcprintf, const CHAR * const szPrefix = NULL ); + + void __cdecl operator()( const CHAR* szFormat, ... ); void Indent(); void Unindent(); - + private: CPRINTF* const m_pcprintf; INT m_cindent; - const _TCHAR* const m_szPrefix; + const CHAR* const m_szPrefix; }; @@ -482,25 +473,25 @@ class CPRINTFFN : public CPRINTF // ================================================================================================================== { public: - CPRINTFFN( INT (__cdecl *pfnPrintf)(const _TCHAR*, ... ) ) : m_pfnPrintf( pfnPrintf ) {} + CPRINTFFN( INT (__cdecl *pfnPrintf)(const CHAR*, ... ) ) : m_pfnPrintf( pfnPrintf ) {} ~CPRINTFFN() {} - void __cdecl operator()( const _TCHAR* szFormat, ... ) + void __cdecl operator()( const CHAR* szFormat, ... ) { - _TCHAR rgchBuf[1024]; - + CHAR rgchBuf[1024]; + va_list arg_ptr; va_start( arg_ptr, szFormat ); - StringCbVPrintf(rgchBuf, sizeof(rgchBuf), szFormat, arg_ptr); + OSStrCbVFormatA(rgchBuf, sizeof(rgchBuf), szFormat, arg_ptr); va_end( arg_ptr ); - (*m_pfnPrintf)( _T( "%s" ), rgchBuf ); + (*m_pfnPrintf)( "%s", rgchBuf ); } private: - INT (__cdecl *m_pfnPrintf)( const _TCHAR*, ... ); + INT (__cdecl *m_pfnPrintf)( const CHAR*, ... ); }; - + // retrieves the current width of stdout diff --git a/dev/ese/published/inc/os/edbg.hxx b/dev/ese/published/inc/os/edbg.hxx index 342bf0fc..bed50ac0 100644 --- a/dev/ese/published/inc/os/edbg.hxx +++ b/dev/ese/published/inc/os/edbg.hxx @@ -38,12 +38,6 @@ struct EDBGGlobals // as a pointer of a different name. extern const EDBGGlobals * rgEDBGGlobalsArray; -HRESULT -EDBGPrintf( - _In_ PCSTR szFormat, - ... -) -; #endif // DEBUGGER_EXTENSION diff --git a/dev/ese/published/inc/os/string.hxx b/dev/ese/published/inc/os/string.hxx index 24079f6d..90abf952 100644 --- a/dev/ese/published/inc/os/string.hxx +++ b/dev/ese/published/inc/os/string.hxx @@ -11,94 +11,136 @@ #undef STRSAFE_NO_DEPRECATE -#ifndef _HRESULT_DEFINED -#define _HRESULT_DEFINED -typedef _Return_type_success_(return >= 0) LONG HRESULT; // required b/c we define/use the StringXxxXxxx() functions inline ... -#endif // _HRESULT_DEFINED - -#pragma prefast(push) -#pragma prefast(disable:28196, "Do not bother us with strsafe, someone else owns that.") -#pragma prefast(disable:28205, "Do not bother us with strsafe, someone else owns that.") -#include "strsafe.h" -#pragma prefast(pop) - #include -// get the length of the string -ERR ErrFromStrsafeHr ( HRESULT hr ); - -LONG LOSStrLengthA( _In_ PCSTR const sz ); -LONG LOSStrLengthW( _In_ PCWSTR const wsz ); -LONG LOSStrLengthUnalignedW( _In_ const UnalignedLittleEndian< WCHAR > * wsz ); -LONG LOSStrLengthMW( _In_ PCWSTR const wsz ); - -// copy a string - -#define ErrOSStrCbCopyA( szDst, cbDst, szSrc) ErrFromStrsafeHr( StringCbCopyA( szDst, cbDst, szSrc) ) -#define ErrOSStrCbCopyW( wszDst, cbDst, wszSrc) ErrFromStrsafeHr( StringCbCopyW( wszDst, cbDst, wszSrc) ) -#if DBG -#define OSStrCbCopyA( szDst, cbDst, szSrc ) { if(ErrOSStrCbCopyA( szDst, cbDst, szSrc )){ AssertSz( fFalse, "Success expected"); } } -#define OSStrCbCopyW( wszDst, cbDst, wszSrc ) { if(ErrOSStrCbCopyW( wszDst, cbDst, wszSrc )){ AssertSz( fFalse, "Success expected"); } } -#else -#define OSStrCbCopyA( szDst, cbDst, szSrc ) ErrOSStrCbCopyA( szDst, cbDst, szSrc ) -#define OSStrCbCopyW( wszDst, cbDst, wszSrc ) ErrOSStrCbCopyW( wszDst, cbDst, wszSrc ) -#endif +// +// get the length of the string in characters. +// Note the unusual usage. Most of our string handling uses count of bytes. +// Historically, however, string length is returned as count of characters. +LONG LOSStrLengthA( + _In_ PCSTR const sz, + _In_ ULONG cchMax = ulMax ); +LONG LOSStrLengthW( + _In_ PCWSTR const wsz, + _In_ ULONG cchMax = ulMax ); +LONG LOSStrLengthUnalignedW( + _In_ const UnalignedLittleEndian< WCHAR > * wsz, + _In_ ULONG cchMax = ulMax ); +LONG LOSStrLengthMW( + _In_ PCWSTR const wsz ); + +// +// copy a string up to a maximum byte count. +ERR ErrOSStrCbCopyA( + _In_ PSTR szDst, + _In_ SIZE_T cbDst, + _In_ PCSTR szSrc ); +ERR ErrOSStrCbCopyW( + _In_ PWSTR szDst, + _In_ SIZE_T cbDst, + _In_ PCWSTR szSrc ); +#define OSStrCbCopyA( szDst, cbDst, szSrc ) \ + { if(ErrOSStrCbCopyA( szDst, cbDst, szSrc )){ AssertSz( fFalse, "Success expected"); } } +#define OSStrCbCopyW( wszDst, cbDst, wszSrc ) \ + { if(ErrOSStrCbCopyW( wszDst, cbDst, wszSrc )){ AssertSz( fFalse, "Success expected"); } } // append a string - -#define ErrOSStrCbAppendA( szDst, cbDst, szSrc ) ErrFromStrsafeHr( StringCbCatA( szDst, cbDst, szSrc ) ) -#define ErrOSStrCbAppendW( wszDst, cbDst, wszSrc ) ErrFromStrsafeHr( StringCbCatW( wszDst, cbDst, wszSrc ) ) -#if DBG -#define OSStrCbAppendA( szDst, cbDst, szSrc ) { if(ErrOSStrCbAppendA( szDst, cbDst, szSrc )){ AssertSz( fFalse, "Success expected"); } } -#define OSStrCbAppendW( wszDst, cbDst, wszSrc ) { if(ErrOSStrCbAppendW( wszDst, cbDst, wszSrc )){ AssertSz( fFalse, "Success expected"); } } -#else -#define OSStrCbAppendA( szDst, cbDst, szSrc ) ErrOSStrCbAppendA( szDst, cbDst, szSrc ) -#define OSStrCbAppendW( wszDst, cbDst, wszSrc ) ErrOSStrCbAppendW( wszDst, cbDst, wszSrc ) -#endif - +ERR ErrOSStrCbAppendA( + _In_ PSTR szDst, + _In_ SIZE_T cbDst, + _In_ PCSTR szSrc ); +ERR ErrOSStrCbAppendW( + _In_ PWSTR wszDst, + _In_ SIZE_T cbDst, + _In_ PCWSTR wszSrc ); +#define OSStrCbAppendA( szDst, cbDst, szSrc ) \ + { if( ErrOSStrCbAppendA( szDst, cbDst, szSrc ) ){ AssertSz( fFalse, "Success expected"); } } +#define OSStrCbAppendW( wszDst, cbDst, wszSrc ) \ + { if( ErrOSStrCbAppendW( wszDst, cbDst, wszSrc ) ){ AssertSz( fFalse, "Success expected"); } } + +// // compare the strings (up to the given maximum length). if the first string // is "less than" the second string, -1 is returned. if the strings are "equal", // 0 is returned. if the first string is "greater than" the second string, +1 is returned. - -LONG LOSStrCompareA( _In_ PCSTR const pszStr1, _In_ PCSTR const pszStr2, _In_ const ULONG cchMax = ~ULONG( 0 ) ); -LONG LOSStrCompareW( _In_ PCWSTR const pwszStr1, _In_ PCWSTR const pwszStr2, _In_ const ULONG cchMax = ~ULONG( 0 ) ); - - -// create a formatted string in a given buffer - -void __cdecl OSStrCbVFormatA ( __out_bcount(cbBuffer) PSTR szBuffer, size_t cbBuffer, __format_string PCSTR szFormat, va_list alist ); -void __cdecl OSStrCbFormatA ( __out_bcount(cbBuffer) PSTR szBuffer, size_t cbBuffer, __format_string PCSTR szFormat, ...); -void __cdecl OSStrCbFormatW ( __out_bcount(cbBuffer) PWSTR szBuffer, size_t cbBuffer, __format_string PCWSTR szFormat, ...); -ERR __cdecl ErrOSStrCbFormatA ( __out_bcount(cbBuffer) PSTR szBuffer, size_t cbBuffer, __format_string PCSTR szFormat, ...); -ERR __cdecl ErrOSStrCbFormatW ( __out_bcount(cbBuffer) PWSTR szBuffer, size_t cbBuffer, __format_string PCWSTR szFormat, ...); - -// Formats a GUID in a given given buffer. -// Note: Does not exist. If it's necessary, please use/refactor WszCATFormatSortID() -// ERR ErrOSStrCbFormatGuid( _Out_writes_bytes_(37*2) PWSTR szBuffer, _In_ const GUID* pguid ); - +// Note the unusual usage. Most of our string handling uses count of bytes. +// Historically, however, string length is returned as count of characters. +LONG LOSStrCompareA( + _In_ PCSTR const pszStr1, + _In_ PCSTR const pszStr2, + _In_ const ULONG cchMax = -1 ); +LONG LOSStrCompareW( + _In_ PCWSTR const pwszStr1, + _In_ PCWSTR const pwszStr2, + _In_ const ULONG cchMax = -1 ); + +// +// create a formatted string in a given buffer and a va_list +ERR __cdecl ErrOSStrCbVFormatA( + _Out_writes_bytes_(cbBuffer) PSTR szBuffer, + SIZE_T cbBuffer, + _Printf_format_string_ PCSTR szFormat, + va_list alist ); +ERR __cdecl ErrOSStrCbVFormatW( + _Out_writes_bytes_(cbBuffer) PWSTR szBuffer, + SIZE_T cbBuffer, + _Printf_format_string_ PCWSTR szFormat, + va_list alist ); +#define OSStrCbVFormatA( szBuffer, cbBuffer, szFormat, alist) \ + { if ( ErrOSStrCbVFormatA( szBuffer, cbBuffer, szFormat, alist ) ){ AssertSz( fFalse, "Success expected" ); } } +#define OSStrCbVFormatW( szBuffer, cbBuffer, szFormat, alist) \ + { if ( ErrOSStrCbVFormatW( szBuffer, cbBuffer, szFormat, alist ) ){ AssertSz( fFalse, "Success expected" ); } } + +// +// create a formatted string in a given buffer with a variadac parameter list +ERR __cdecl ErrOSStrCbFormatA( + _Out_writes_bytes_(cbBuffer) PSTR szBuffer, + SIZE_T cbBuffer, + _Printf_format_string_ PCSTR szFormat, + ...); +ERR __cdecl ErrOSStrCbFormatW ( + _Out_writes_bytes_(cbBuffer) PWSTR szBuffer, + SIZE_T cbBuffer, + _Printf_format_string_ PCWSTR szFormat, + ...); +#define OSStrCbFormatA( szBuffer, cbBuffer, szFormat, ... ) \ + { if ( ErrOSStrCbFormatA( szBuffer, cbBuffer, szFormat, __VA_ARGS__ ) ){ AssertSz( fFalse, "Success expected" ); } } +#define OSStrCbFormatW( szBuffer, cbBuffer, szFormat, ... ) \ + { if ( ErrOSStrCbFormatW( szBuffer, cbBuffer, szFormat, __VA_ARGS__ ) ){ AssertSz( fFalse, "Success expected" ); } } + +// // returns a pointer to the next character in the string. when no more // characters are left, the given ptr is returned. - -VOID OSStrCharFindA( _In_ PCSTR const szStr, const char ch, _Outptr_result_maybenull_ PSTR * const pszFound ); -VOID OSStrCharFindW( _In_ PCWSTR const wszStr, const wchar_t wch, _Outptr_result_maybenull_ PWSTR * const pwszFound ); +VOID OSStrCharFindA( + _In_ PCSTR const szStr, + const CHAR ch, + _Outptr_result_maybenull_ PSTR * const pszFound ); +VOID OSStrCharFindW( + _In_ PCWSTR const wszStr, + const WCHAR wch, + _Outptr_result_maybenull_ PWSTR * const pwszFound ); // find the last occurrence of the given character in the given string and // return a pointer to that character. NULL is returned when the character // is not found. - -VOID OSStrCharFindReverseA( _In_ PCSTR const szStr, const char ch, _Outptr_result_maybenull_ PSTR * const pszFound ); -VOID OSStrCharFindReverseW( _In_ PCWSTR const wszStr, const wchar_t wch, _Outptr_result_maybenull_ PWSTR * const pwszFound ); +VOID OSStrCharFindReverseA( + _In_ PCSTR const szStr, + const CHAR ch, + _Outptr_result_maybenull_ PSTR * const pszFound ); +VOID OSStrCharFindReverseW( + _In_ PCWSTR const wszStr, + const WCHAR wch, + _Outptr_result_maybenull_ PWSTR * const pwszFound ); +// // check for a trailing path-delimeter - BOOL FOSSTRTrailingPathDelimiterA( _In_ PCSTR const pszPath ); BOOL FOSSTRTrailingPathDelimiterW( _In_ PCWSTR const pwszPath ); +// // convert with a fixed conversion code page (1252 / Windows English) or use a context dependant // conversion (CP_ACP). - typedef enum { // Should be used when the same conversion should be used @@ -106,66 +148,56 @@ typedef enum // should only be in ASCII). OSSTR_FIXED_CONVERSION = 0, + // Should be used when the OS locale and setting should be // considered (e.g.: customer data). OSSTR_CONTEXT_DEPENDENT_CONVERSION = 1, -} OSSTR_CONVERSION; -// convert a byte string to a wide-char string +} OSSTR_CONVERSION; -ERR ErrOSSTRAsciiToUnicode( _In_ PCSTR const pszIn, - _Out_opt_z_cap_post_count_(cwchOut, *pcwchRequired) PWSTR const pwszOut, - const size_t cwchOut, - size_t * const pcwchRequired = NULL, - const OSSTR_CONVERSION osstrConversion = OSSTR_CONTEXT_DEPENDENT_CONVERSION ); +typedef enum +{ + OSSTR_NOT_LOSSY = 0, + OSSTR_ALLOW_LOSSY = 1 +} OSSTR_LOSSY; +// +// convert a byte string to a wide-char string +ERR ErrOSSTRAsciiToUnicode( + _In_ PCSTR const pszIn, + _Out_opt_z_cap_post_count_(cwchOut, *pcwchRequired) PWSTR const pwszOut, + const SIZE_T cwchOut, + SIZE_T * const pcwchRequired = NULL, + const OSSTR_CONVERSION osstrConversion = OSSTR_CONTEXT_DEPENDENT_CONVERSION + ); + +// // convert a wide-char string to a byte string -typedef enum { OSSTR_NOT_LOSSY = 0, OSSTR_ALLOW_LOSSY = 1 } OSSTR_LOSSY; - -ERR ErrOSSTRUnicodeToAscii( _In_ PCWSTR const pwszIn, - _Out_opt_z_cap_post_count_(cchOut, *pcchRequired) PSTR const pwszOut, - const size_t cchOut, - size_t * const pcchRequired = NULL, - const OSSTR_LOSSY fLossy = OSSTR_NOT_LOSSY, - const OSSTR_CONVERSION osstrConversion = OSSTR_CONTEXT_DEPENDENT_CONVERSION ); - -#ifdef __TCHAR_DEFINED -// convert a WCHAR string to a _TCHAR string - -ERR ErrOSSTRUnicodeToTchar( const wchar_t *const pwszIn, - __out_ecount(ctchOut) _TCHAR *const ptszOut, - const INT ctchOut, - const OSSTR_CONVERSION osstrConversion = OSSTR_CONTEXT_DEPENDENT_CONVERSION ); -#endif - -// generic names for UNICODE and non-UNICODE cases - -#ifdef UNICODE - -#define LOSStrLength LOSStrLengthW -#define OSStrAppend OSSTRAppendW -#define LOSSTRCompare LOSSTRCompareW -// #define OSStrFormat OSStrFormatW -//#define OSStrCharFind OSStrCharFindW -//#define OSStrCharFindReverse OSStrCharFindReverseW -#define FOSSTRTrailingPathDelimiter FOSSTRTrailingPathDelimiterW - -#else // !UNICODE - -#define LOSStrLength LOSStrLengthA -#define OSStrAppend OSSTRAppendA -#define LOSSTRCompare LOSSTRCompareA -//#define OSStrFormat OSStrFormatA -//#define OSStrCharFind OSStrCharFindA -//#define OSStrCharFindReverse OSStrCharFindReverseA -#define FOSSTRTrailingPathDelimiter FOSSTRTrailingPathDelimiterA - -#endif // UNICODE - - -ERR ErrOSSTRAsciiToUnicodeM( _In_ PCSTR const szzMultiIn, __out_ecount_z(cchMax) WCHAR * wszNew, ULONG cchMax, size_t * const pcchActual, const OSSTR_CONVERSION osstrConversion = OSSTR_CONTEXT_DEPENDENT_CONVERSION ); -ERR ErrOSSTRUnicodeToAsciiM( _In_ PCWSTR const wszzMultiIn, __out_ecount_z(cchMax) char * szNew, ULONG cchMax, size_t * const pcchActual, const OSSTR_CONVERSION osstrConversion = OSSTR_CONTEXT_DEPENDENT_CONVERSION ); +ERR ErrOSSTRUnicodeToAscii( + _In_ PCWSTR const pwszIn, + _Out_opt_z_cap_post_count_(cchOut, *pcchRequired) PSTR const pwszOut, + const SIZE_T cchOut, + SIZE_T * const pcchRequired = NULL, + const OSSTR_LOSSY fLossy = OSSTR_NOT_LOSSY, + const OSSTR_CONVERSION osstrConversion = OSSTR_CONTEXT_DEPENDENT_CONVERSION + ); + +ERR ErrOSSTRAsciiToUnicodeM( + _In_ PCSTR const szzMultiIn, + __out_ecount_z(cchMax) WCHAR * wszNew, + ULONG cchMax, + SIZE_T * const pcchActual, + const OSSTR_CONVERSION osstrConversion = OSSTR_CONTEXT_DEPENDENT_CONVERSION + ); + +ERR ErrOSSTRUnicodeToAsciiM( + _In_ PCWSTR const wszzMultiIn, + __out_ecount_z(cchMax) CHAR * szNew, + ULONG cchMax, + SIZE_T * const pcchActual, + const OSSTR_CONVERSION osstrConversion = OSSTR_CONTEXT_DEPENDENT_CONVERSION + ); #endif // __OS_STRING_HXX_INCLUDED diff --git a/dev/ese/published/inc/os/thread.hxx b/dev/ese/published/inc/os/thread.hxx index 1e28587d..1d4ab6df 100644 --- a/dev/ese/published/inc/os/thread.hxx +++ b/dev/ese/published/inc/os/thread.hxx @@ -42,9 +42,16 @@ enum EThreadPriority // creates a thread with the specified attributes -const ERR ErrUtilThreadICreate( const PUTIL_THREAD_PROC pfnStart, const DWORD cbStack, const EThreadPriority priority, THREAD* const pThread, const DWORD_PTR dwParam, const _TCHAR* const szStart ); +const ERR ErrUtilThreadICreate( + const PUTIL_THREAD_PROC pfnStart, + const DWORD cbStack, + const EThreadPriority priority, + THREAD* const pThread, + const DWORD_PTR dwParam, + const CHAR* const szStart ); + #define ErrUtilThreadCreate( pfnStart, cbStack, priority, phThread, dwParam ) \ - ( ErrUtilThreadICreate( pfnStart, cbStack, priority, phThread, dwParam, _T( #pfnStart ) ) ) + ( ErrUtilThreadICreate( pfnStart, cbStack, priority, phThread, dwParam, #pfnStart ) ) // waits for the specified thread to exit and returns its return value diff --git a/dev/ese/published/inc/os/trace.hxx b/dev/ese/published/inc/os/trace.hxx index 748f7dfe..9cdc6b25 100644 --- a/dev/ese/published/inc/os/trace.hxx +++ b/dev/ese/published/inc/os/trace.hxx @@ -6,6 +6,8 @@ // Required for std::move() #include +// Required for wprintf +#include class TRACEINFO { diff --git a/dev/ese/src/_esefile/esefile.hxx b/dev/ese/src/_esefile/esefile.hxx index 94214b81..a5ac9320 100644 --- a/dev/ese/src/_esefile/esefile.hxx +++ b/dev/ese/src/_esefile/esefile.hxx @@ -7,13 +7,6 @@ #include "cc.hxx" #include "math.hxx" -#pragma prefast(push) -#pragma prefast(disable:26006, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:26007, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:28718, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:28726, "Dont bother us with tchar, someone else owns that.") -#include -#pragma prefast(pop) #include "os.hxx" #include "jet.h" diff --git a/dev/ese/src/_perfctrs/perfdata.pl b/dev/ese/src/_perfctrs/perfdata.pl index e117e6a7..fa08c2f8 100644 --- a/dev/ese/src/_perfctrs/perfdata.pl +++ b/dev/ese/src/_perfctrs/perfdata.pl @@ -376,13 +376,6 @@ #include -#pragma prefast(push) -#pragma prefast(disable:26006, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:26007, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:28718, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:28726, "Dont bother us with tchar, someone else owns that.") -#include -#pragma prefast(pop) #include #include @@ -458,8 +451,7 @@ #else #include -#include - + #include "perfmon.hxx" #pragma pack(4) diff --git a/dev/ese/src/_xpress10/xpress10corsica.cxx b/dev/ese/src/_xpress10/xpress10corsica.cxx index 9f726acf..15115b6b 100644 --- a/dev/ese/src/_xpress10/xpress10corsica.cxx +++ b/dev/ese/src/_xpress10/xpress10corsica.cxx @@ -14,7 +14,7 @@ #pragma prefast(disable:26007, "Dont bother us with tchar, someone else owns that.") #pragma prefast(disable:28718, "Dont bother us with tchar, someone else owns that.") #pragma prefast(disable:28726, "Dont bother us with tchar, someone else owns that.") -#include +//TAW_TODO #include #pragma prefast(pop) #ifndef WIN32_LEAN_AND_MEAN diff --git a/dev/ese/src/_xpress10/xpress10sw.cxx b/dev/ese/src/_xpress10/xpress10sw.cxx index cc3a909f..ee418f98 100644 --- a/dev/ese/src/_xpress10/xpress10sw.cxx +++ b/dev/ese/src/_xpress10/xpress10sw.cxx @@ -14,7 +14,7 @@ #pragma prefast(disable:26007, "Dont bother us with tchar, someone else owns that.") #pragma prefast(disable:28718, "Dont bother us with tchar, someone else owns that.") #pragma prefast(disable:28726, "Dont bother us with tchar, someone else owns that.") -#include +//#include #pragma prefast(pop) #ifndef WIN32_LEAN_AND_MEAN diff --git a/dev/ese/src/checksum/checksumstd.hxx b/dev/ese/src/checksum/checksumstd.hxx index d215b473..639aade3 100644 --- a/dev/ese/src/checksum/checksumstd.hxx +++ b/dev/ese/src/checksum/checksumstd.hxx @@ -5,13 +5,6 @@ #include #include -#pragma prefast(push) -#pragma prefast(disable:26006, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:26007, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:28718, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:28726, "Dont bother us with tchar, someone else owns that.") -#include -#pragma prefast(pop) #include #include #include diff --git a/dev/ese/src/ese/_log/logdump.cxx b/dev/ese/src/ese/_log/logdump.cxx index 16d6c632..9ad31064 100644 --- a/dev/ese/src/ese/_log/logdump.cxx +++ b/dev/ese/src/ese/_log/logdump.cxx @@ -537,13 +537,13 @@ ERR ErrDUMPLog( INST *pinst, _In_ PCWSTR wszLog, const LONG lgenStart, const LON const ULONG cchLogPath = LOSStrLengthW( wszLog ); const WCHAR * wszLogExt = NULL; ULONG cLogDigits = 0; - CWPRINTFFILE cpfCsvOut( wszCsvDataFile ); + CPRINTFFILE cpfCsvOutW( wszCsvDataFile, CPRINTFFILE::FILEENCODING::UTF16 ); DIRLOGGENERATIONINFO dirloginfo = { 0 }; LONG lgenStartSanitised = 0; LONG lgenEndSanitised = 0; logdumpOp.m_opts = 0; - Assert( logdumpOp.m_pcwpfCsvOut == NULL ); + Assert( logdumpOp.m_pcpfCsvOutW == NULL ); if ( cchLogPath >= IFileSystemAPI::cchPathMax ) { @@ -570,14 +570,14 @@ ERR ErrDUMPLog( INST *pinst, _In_ PCWSTR wszLog, const LONG lgenStart, const LON if ( grbit & JET_bitDBUtilOptionDumpLogInfoCSV ) { Assert( wszCsvDataFile ); // otherwise we won't be printing to any file. - logdumpOp.m_pcwpfCsvOut = &cpfCsvOut; + logdumpOp.m_pcpfCsvOutW = &cpfCsvOutW; DUMPPrintF( "\n Csv file: %ws\n", wszCsvDataFile ); - if (JET_errSuccess != logdumpOp.m_pcwpfCsvOut->m_errLast) + if (JET_errSuccess != logdumpOp.m_pcpfCsvOutW->m_errLast) { DUMPPrintF( "\n Cannot open csv file (%ws). Error %d.\n", - wszCsvDataFile, logdumpOp.m_pcwpfCsvOut->m_errLast); - Call( ErrERRCheck( logdumpOp.m_pcwpfCsvOut->m_errLast ) ); + wszCsvDataFile, logdumpOp.m_pcpfCsvOutW->m_errLast); + Call( ErrERRCheck( logdumpOp.m_pcpfCsvOutW->m_errLast ) ); } } else if ( grbit & JET_bitDBUtilOptionDumpVerboseLevel1 ) @@ -760,12 +760,12 @@ ERR ErrDUMPLog( INST *pinst, _In_ PCWSTR wszLog, const LONG lgenStart, const LON // for checksum mode, eseutil will generate the whitespace on error } - if (logdumpOp.m_pcwpfCsvOut - && (JET_errSuccess != logdumpOp.m_pcwpfCsvOut->m_errLast)) + if (logdumpOp.m_pcpfCsvOutW + && (JET_errSuccess != logdumpOp.m_pcpfCsvOutW->m_errLast)) { DUMPPrintF( "\n Cannot write csv file (%ws). Error %d.\n", - wszCsvDataFile, logdumpOp.m_pcwpfCsvOut->m_errLast); - Call( ErrERRCheck( logdumpOp.m_pcwpfCsvOut->m_errLast ) ); + wszCsvDataFile, logdumpOp.m_pcpfCsvOutW->m_errLast); + Call( ErrERRCheck( logdumpOp.m_pcpfCsvOutW->m_errLast ) ); } } else @@ -789,7 +789,7 @@ ERR ErrDUMPLog( INST *pinst, _In_ PCWSTR wszLog, const LONG lgenStart, const LON else if ( grbit & JET_bitDBUtilOptionDumpLogInfoCSV ) { Assert( wszCsvDataFile ); // otherwise we won't be printing to any file. - logdumpOp.m_pcwpfCsvOut = &cpfCsvOut; + logdumpOp.m_pcpfCsvOutW = &cpfCsvOutW; } else { @@ -822,14 +822,14 @@ ERR ErrDUMPLog( INST *pinst, _In_ PCWSTR wszLog, const LONG lgenStart, const LON { DUMPPrintF( " Base name: %ws\n", SzParam( pinst, JET_paramBaseName ) ); DUMPPrintF( " Log file: %ws", wszLog ); - if (logdumpOp.m_pcwpfCsvOut) + if (logdumpOp.m_pcpfCsvOutW) { DUMPPrintF( "\n Csv file: %ws", wszCsvDataFile ); - if (JET_errSuccess != logdumpOp.m_pcwpfCsvOut->m_errLast) + if (JET_errSuccess != logdumpOp.m_pcpfCsvOutW->m_errLast) { DUMPPrintF( "\n Cannot open csv file (%ws). Error %d.\n", - wszCsvDataFile, logdumpOp.m_pcwpfCsvOut->m_errLast); - Call( ErrERRCheck( logdumpOp.m_pcwpfCsvOut->m_errLast ) ); + wszCsvDataFile, logdumpOp.m_pcpfCsvOutW->m_errLast); + Call( ErrERRCheck( logdumpOp.m_pcpfCsvOutW->m_errLast ) ); } } } @@ -838,12 +838,12 @@ ERR ErrDUMPLog( INST *pinst, _In_ PCWSTR wszLog, const LONG lgenStart, const LON if ( !logdumpOp.m_fVerifyOnly ) { - if (logdumpOp.m_pcwpfCsvOut - && (JET_errSuccess != logdumpOp.m_pcwpfCsvOut->m_errLast)) + if (logdumpOp.m_pcpfCsvOutW + && (JET_errSuccess != logdumpOp.m_pcpfCsvOutW->m_errLast)) { DUMPPrintF( "\n Cannot write csv file (%ws). Error %d.\n", - wszCsvDataFile, logdumpOp.m_pcwpfCsvOut->m_errLast); - Call( ErrERRCheck( logdumpOp.m_pcwpfCsvOut->m_errLast ) ); + wszCsvDataFile, logdumpOp.m_pcpfCsvOutW->m_errLast); + Call( ErrERRCheck( logdumpOp.m_pcpfCsvOutW->m_errLast ) ); } DUMPPrintF( "\n" ); } @@ -854,7 +854,7 @@ ERR ErrDUMPLog( INST *pinst, _In_ PCWSTR wszLog, const LONG lgenStart, const LON } HandleError: - logdumpOp.m_pcwpfCsvOut = NULL; // stack var will be freed ... + logdumpOp.m_pcpfCsvOutW = NULL; // stack var will be freed ... if ( NULL != plgfilehdr ) { OSMemoryPageFree( plgfilehdr ); @@ -884,7 +884,7 @@ ERR LOG::ErrLGIDumpOneAttachment( const ATTACHINFO * const pattachinfo, const LO Call( wszDbName.ErrSet( (CHAR*)(pattachinfo->szNames) ) ); } - if ( NULL == plogdumpOp->m_pcwpfCsvOut ) + if ( NULL == plogdumpOp->m_pcpfCsvOutW ) { DUMPPrintF( " %d %ws%ws\n", pattachinfo->Dbid(), (WCHAR *)wszDbName, pattachinfo->FSparseEnabledFile() ? L" (sparse)" : L"" ); @@ -900,7 +900,7 @@ ERR LOG::ErrLGIDumpOneAttachment( const ATTACHINFO * const pattachinfo, const LO } const size_t cchHexDumped = 3 * sizeof( pattachinfo->signDb ); rgwchSignBuf[ cchHexDumped ] = L'\0'; - (*(plogdumpOp->m_pcwpfCsvOut))( L"%s, %d, \"%s\", %s\r\n", wszLogHeaderAttachInfo, pattachinfo->Dbid(), (WCHAR *)wszDbName, rgwchSignBuf ); + (*(plogdumpOp->m_pcpfCsvOutW))( L"%s, %d, \"%s\", %s\r\n", wszLogHeaderAttachInfo, pattachinfo->Dbid(), (WCHAR *)wszDbName, rgwchSignBuf ); } HandleError: @@ -1201,7 +1201,7 @@ ERR LOG::ErrLGDumpLog( IFileAPI *const pfapi, LOGDUMP_OP * const plogdumpOp, LGF WCHAR const * wszLogHeaderGeneralInfo = L"LHGI"; - if( plogdumpOp->m_pcwpfCsvOut ) + if( plogdumpOp->m_pcpfCsvOutW ) { CHAR szLogSig[128]; // plenty of space WCHAR wszLogCreate[128]; // plenty of space @@ -1228,7 +1228,7 @@ ERR LOG::ErrLGDumpLog( IFileAPI *const pfapi, LOGDUMP_OP * const plogdumpOp, LGF (SHORT) m_pLogStream->GetCurrentFileHdr()->lgfilehdr.tmPrevGen.bSeconds, (SHORT) m_pLogStream->GetCurrentFileHdr()->lgfilehdr.tmPrevGen.Milliseconds()); - (*(plogdumpOp->m_pcwpfCsvOut))(L"%ws, %hs, %08.08X, %08.08X, %ws, %ws, %d.%d.%d.%d, %ws, %d\r\n", + (*(plogdumpOp->m_pcpfCsvOutW))(L"%ws, %hs, %08.08X, %08.08X, %ws, %ws, %d.%d.%d.%d, %ws, %d\r\n", wszLogHeaderGeneralInfo, szLogSig, (ULONG)m_pLogStream->GetCurrentFileGen(), (ULONG)m_pLogStream->GetCurrentFileHdr()->lgfilehdr.le_ulChecksum, wszLogCreate, wszPrevLogCreate, @@ -1303,7 +1303,7 @@ ERR LOG::ErrLGDumpLog( IFileAPI *const pfapi, LOGDUMP_OP * const plogdumpOp, LGF (USHORT)m_pLogStream->GetCurrentFileHdr()->lgfilehdr.le_lgposCheckpoint.le_ib ); } - if ( fPrint || plogdumpOp->m_pcwpfCsvOut ) + if ( fPrint || plogdumpOp->m_pcpfCsvOutW ) { err = ErrLGIDumpAttachments( plogdumpOp ); if ( err < 0 ) @@ -1432,13 +1432,13 @@ ERR LOG::ErrLGDumpLog( IFileAPI *const pfapi, LOGDUMP_OP * const plogdumpOp, LGF } } - if( plogdumpOp->m_pcwpfCsvOut ) + if( plogdumpOp->m_pcpfCsvOutW ) { LGPOS lgpos; m_pLogReadBuffer->GetLgposOfPbNext(&lgpos); Assert( lgpos.lGeneration == LONG(m_pLogStream->GetCurrentFileGen()) ); lgpos.lGeneration = LONG(m_pLogStream->GetCurrentFileGen()); - Call( ErrLrToLogCsvSimple( plogdumpOp->m_pcwpfCsvOut, lgpos, plr, this ) ); + Call( ErrLrToLogCsvSimple( plogdumpOp->m_pcpfCsvOutW, lgpos, plr, this ) ); } logRecPosCurr++; @@ -1453,17 +1453,17 @@ ERR LOG::ErrLGDumpLog( IFileAPI *const pfapi, LOGDUMP_OP * const plogdumpOp, LGF Call( err ); CallS( err ); - if (plogdumpOp->m_pcwpfCsvOut) + if (plogdumpOp->m_pcpfCsvOutW) { // SOMEONE doesn't want the added dev/test cost of parsing LTCL along with LTEL and . // So, a csv dump will end with LTEL (good log) or (bad log or problems with csv file). // SOMEONE believes LTCL is needed because the corruption may have occcured after // the DB was updated with info past the corruption in the log. But SOMEONE's current design // doesn't need this refinement and so I am disabling LTCL at his request. - // (*plogdumpOp->m_pcwpfCsvOut)((fCorrupt) ? szLogTrailerCorruptLog : szLogTrailerEndOfLog); + // (*plogdumpOp->m_pcpfCsvOutW)((fCorrupt) ? szLogTrailerCorruptLog : szLogTrailerEndOfLog); if (!fCorrupt) { - (*plogdumpOp->m_pcwpfCsvOut)(szLogTrailerEndOfLog); + (*plogdumpOp->m_pcpfCsvOutW)(szLogTrailerEndOfLog); } } // verbose dump diff --git a/dev/ese/src/ese/_log/logread_legacy.cxx b/dev/ese/src/ese/_log/logread_legacy.cxx index e8f1b931..7989bec7 100644 --- a/dev/ese/src/ese/_log/logread_legacy.cxx +++ b/dev/ese/src/ese/_log/logread_legacy.cxx @@ -528,7 +528,7 @@ INLINE BOOL FLGILogPatchDate( const WCHAR* wszPath, CPRINTFFILE **const ppcprint // allocate a new trace file object - pcprintf = new CPRINTFFILE( wszPath ); + pcprintf = new CPRINTFFILE( wszPath, CPRINTFFILE::FILEENCODING::ASCII ); if ( !pcprintf ) { return fFalse; @@ -840,7 +840,7 @@ ERR LOG_READ_BUFFER::ErrLGCheckReadLastLogRecordFF_Legacy( ULONG ulChecksumExpected; ULONG ulChecksumActual; #ifdef ENABLE_LOGPATCH_TRACE - CPRINTFFILE *pcprintfLogPatch = NULL; + CPRINTFFILE * pcprintfLogPatch = NULL; WCHAR wszLogPatchPath[ IFileSystemAPI::cchPathMax ]; #endif // ENABLE_LOGPATCH_TRACE // diff --git a/dev/ese/src/ese/_log/logstream.cxx b/dev/ese/src/ese/_log/logstream.cxx index e187f46d..1989d5c9 100644 --- a/dev/ese/src/ese/_log/logstream.cxx +++ b/dev/ese/src/ese/_log/logstream.cxx @@ -50,7 +50,7 @@ LOG_STREAM::LOG_STREAM( INST * pinst, LOG * pLog ) m_fCreateAsynchZeroFilled( fFalse ), m_errCreateAsynch( JET_errSuccess ), // Asynchronous log file creation - m_asigCreateAsynchIOCompleted( CSyncBasicInfo( _T( "LOG::m_asigCreateAsynchIOCompleted" ) ) ), + m_asigCreateAsynchIOCompleted( CSyncBasicInfo( "LOG::m_asigCreateAsynchIOCompleted" ) ), m_critCreateAsynchIOExecuting( CLockBasicInfo( CSyncBasicInfo( "LOG::m_critCreateAsynchIOExecuting" ), rankAsynchIOExecuting, 0 ) ), m_lgposCreateAsynchTrigger( lgposMax ), m_critJetTmpLog( CLockBasicInfo( CSyncBasicInfo( szJetTmpLog ), rankJetTmpLog, 0 ) ), diff --git a/dev/ese/src/ese/_log/logwrite.cxx b/dev/ese/src/ese/_log/logwrite.cxx index c68f7c11..41c277e7 100644 --- a/dev/ese/src/ese/_log/logwrite.cxx +++ b/dev/ese/src/ese/_log/logwrite.cxx @@ -162,9 +162,9 @@ LOG_WRITE_BUFFER::LOG_WRITE_BUFFER( INST * pinst, LOG * pLog, ILogStream * pLogS // we always start writing to a new sector, so we never have a shadow sector to start with m_fHaveShadow( fFalse ), m_sigLogPaused( CSyncBasicInfo( "LOG_WRITE_BUFFER::sigLogPaused" ) ), - m_semLogSignal( CSyncBasicInfo( _T( "LOG::m_semLogSignal" ) ) ), - m_semLogWrite( CSyncBasicInfo( _T( "LOG::m_semLogWrite" ) ) ), - m_semWaitForLogBufferSpace( CSyncBasicInfo( _T( "LOG::m_semWaitForLogBufferSpace" ) ) ), + m_semLogSignal( CSyncBasicInfo( "LOG::m_semLogSignal" ) ), + m_semLogWrite( CSyncBasicInfo( "LOG::m_semLogWrite" ) ), + m_semWaitForLogBufferSpace( CSyncBasicInfo( "LOG::m_semWaitForLogBufferSpace" ) ), m_critLGWaitQ( CLockBasicInfo( CSyncBasicInfo( szLGWaitQ ), rankLGWaitQ, 0 ) ), m_tickNextLazyCommit( 0 ), m_lgposNextLazyCommit( lgposMin ), diff --git a/dev/ese/src/ese/_osu/checksumu.cxx b/dev/ese/src/ese/_osu/checksumu.cxx index d821aa77..a0a717bc 100644 --- a/dev/ese/src/ese/_osu/checksumu.cxx +++ b/dev/ese/src/ese/_osu/checksumu.cxx @@ -50,8 +50,6 @@ deal with that. #include "osustd.hxx" #include "esestd.hxx" - - // ================================================================ XECHECKSUM LongChecksumFromShortChecksum( const ULONG xorChecksum, const ULONG pgno ) // ================================================================ @@ -549,13 +547,13 @@ void DumpLargePageChecksumInfo( { PAGECHECKSUM checksumStoredInHeader = ChecksumFromPage( pv, cb, pagetype ); PAGECHECKSUM checksumComputedOffData = ComputePageChecksum( pv, cb, pagetype, pgno ); - (*pcprintf)( _T( "HEADER checksum = 0x%016I64X:0x%016I64X:0x%016I64X:0x%016I64X\n" ), + (*pcprintf)( "HEADER checksum = 0x%016I64X:0x%016I64X:0x%016I64X:0x%016I64X\n", checksumStoredInHeader.rgChecksum[ 0 ], checksumStoredInHeader.rgChecksum[ 1 ], checksumStoredInHeader.rgChecksum[ 2 ], checksumStoredInHeader.rgChecksum[ 3 ] ); if( checksumStoredInHeader != checksumComputedOffData ) { - (*pcprintf)( _T( "****** checksum mismatch ******\n" ) ); - (*pcprintf)( _T( "COMPUTED checksum = 0x%016I64X:0x%016I64X:0x%016I64X:0x%016I64X\n" ), + (*pcprintf)( "****** checksum mismatch ******\n" ); + (*pcprintf)( "COMPUTED checksum = 0x%016I64X:0x%016I64X:0x%016I64X:0x%016I64X\n", checksumComputedOffData.rgChecksum[ 0 ], checksumComputedOffData.rgChecksum[ 1 ], checksumComputedOffData.rgChecksum[ 2 ], checksumComputedOffData.rgChecksum[ 3 ] ); BOOL fCorrectableError = fFalse; @@ -564,15 +562,15 @@ void DumpLargePageChecksumInfo( if ( !fCorrectableError ) { - (*pcprintf)( _T( "error is NOT correctable by the checksum\n" ) ); + (*pcprintf)( "error is NOT correctable by the checksum\n" ); } else { Assert( 0 <= ibitCorrupted && ( unsigned )ibitCorrupted <= CHAR_BIT * cb ); - (*pcprintf)( _T( "a bit at offset %d (0x%X) was corrupted and can be corrected by the checksum\n" ), ibitCorrupted, ibitCorrupted ); + (*pcprintf)( "a bit at offset %d (0x%X) was corrupted and can be corrected by the checksum\n", ibitCorrupted, ibitCorrupted ); const PAGECHECKSUM checksumFixed = ComputePageChecksum( pv, cb, pagetype, pgno ); - (*pcprintf)( _T( "FIXED checksum = 0x%016I64X:0x%016I64X:0x%016I64X:0x%016I64X\n" ), + (*pcprintf)( "FIXED checksum = 0x%016I64X:0x%016I64X:0x%016I64X:0x%016I64X\n", checksumFixed.rgChecksum[ 0 ], checksumFixed.rgChecksum[ 1 ], checksumFixed.rgChecksum[ 2 ], checksumFixed.rgChecksum[ 3 ] ); Assert( checksumFixed == checksumStoredInHeader ); } @@ -580,7 +578,7 @@ void DumpLargePageChecksumInfo( } EXCEPT( efaExecuteHandler ) { - (*pcprintf)( _T( "\t\n" ) ); + (*pcprintf)( "\t\n" ); } ENDEXCEPT } @@ -607,56 +605,56 @@ void DumpPageChecksumInfo( const BOOL fNewChecksumFormat = FPageHasNewChecksumFormat( pv, pagetype ); const BOOL fBadChecksum = ( checksumStoredInHeader != checksumComputedOffData ); - (*pcprintf)( _T( "\theader checksum = 0x%016I64x\n" ), checksumStoredInHeader.rgChecksum[ 0 ] ); + (*pcprintf)( "\theader checksum = 0x%016I64x\n", checksumStoredInHeader.rgChecksum[ 0 ] ); if( fBadChecksum ) { - (*pcprintf)( _T( "\t****** checksum mismatch ******\n" ) ); - (*pcprintf)( _T( "\tcomputed checksum = 0x%016I64x\n" ), checksumComputedOffData ); + (*pcprintf)( "\t****** checksum mismatch ******\n" ); + (*pcprintf)( "\tcomputed checksum = 0x%016I64x\n", checksumComputedOffData ); } if( !fNewChecksumFormat ) { - (*pcprintf)( _T( "\t\told checksum format\n" ) ); + (*pcprintf)( "\t\told checksum format\n" ); const ULONG * pdw = (const ULONG * ) pv; const ULONG pgnoFromPage = pdw[1]; - (*pcprintf)( _T( "\t\t\tpgno = %d\n" ), pgnoFromPage ); + (*pcprintf)( "\t\t\tpgno = %d\n", pgnoFromPage ); } else { - (*pcprintf)( _T( "\t\tnew checksum format\n" ) ); + (*pcprintf)( "\t\tnew checksum format\n" ); const ULONG eccChecksumComputed = DwECCChecksumFromXEChecksum( checksumComputedOffData.rgChecksum[ 0 ] ); const ULONG xorChecksumComputed = DwXORChecksumFromXEChecksum( checksumComputedOffData.rgChecksum[ 0 ] ); const ULONG eccChecksumHeader = DwECCChecksumFromXEChecksum( checksumStoredInHeader.rgChecksum[ 0 ] ); const ULONG xorChecksumHeader = DwXORChecksumFromXEChecksum( checksumStoredInHeader.rgChecksum[ 0 ] ); - (*pcprintf)( _T( "\t\t\theader ECC checksum = 0x%08x\n" ), eccChecksumHeader ); + (*pcprintf)( "\t\t\theader ECC checksum = 0x%08x\n", eccChecksumHeader ); if( fBadChecksum ) { - (*pcprintf)( _T( "\t\t\tcomputed ECC checksum = 0x%08x\n" ), eccChecksumComputed ); + (*pcprintf)( "\t\t\tcomputed ECC checksum = 0x%08x\n", eccChecksumComputed ); } - (*pcprintf)( _T( "\t\t\theader XOR checksum = 0x%08x\n" ), xorChecksumHeader ); + (*pcprintf)( "\t\t\theader XOR checksum = 0x%08x\n", xorChecksumHeader ); if( fBadChecksum ) { - (*pcprintf)( _T( "\t\t\tcomputed XOR checksum = 0x%08x\n" ), xorChecksumComputed ); + (*pcprintf)( "\t\t\tcomputed XOR checksum = 0x%08x\n", xorChecksumComputed ); } if( fBadChecksum ) { if( !FECCErrorIsCorrectable( cb, checksumStoredInHeader.rgChecksum[ 0 ], checksumComputedOffData.rgChecksum[ 0 ] ) ) { - (*pcprintf)( _T( "\tchecksum error is NOT correctable\n" ) ); + (*pcprintf)( "\tchecksum error is NOT correctable\n" ); if( eccChecksumComputed == eccChecksumHeader ) { const ULONG pgnoPossible = xorChecksumComputed ^ xorChecksumHeader ^ pgno; - (*pcprintf)( _T( "\tECC checksums match. perhaps this is actually page %d?\n" ), pgnoPossible ); + (*pcprintf)( "\tECC checksums match. perhaps this is actually page %d?\n", pgnoPossible ); } } else { - (*pcprintf)( _T( "\tchecksum error is correctable\n" ) ); + (*pcprintf)( "\tchecksum error is correctable\n" ); const UINT ibitCorrupted = IbitCorrupted( cb, checksumStoredInHeader.rgChecksum[ 0 ], checksumComputedOffData.rgChecksum[ 0 ] ); - (*pcprintf)( _T( "\t\tbit %d is corrupted\n" ), ibitCorrupted ); + (*pcprintf)( "\t\tbit %d is corrupted\n", ibitCorrupted ); if( IbitNewChecksumFormatFlag( pagetype ) == ibitCorrupted ) { - (*pcprintf)( _T( "\t\tbit %d is the checksum format flag! this corruption is not fixable\n" ), ibitCorrupted ); + (*pcprintf)( "\t\tbit %d is the checksum format flag! this corruption is not fixable\n", ibitCorrupted ); } else { @@ -664,16 +662,16 @@ void DumpPageChecksumInfo( const PAGECHECKSUM checksumFixed = ComputePageChecksum( pv, cb, pagetype, pgno ); const ULONG eccChecksumFixed = DwECCChecksumFromXEChecksum( checksumFixed.rgChecksum[ 0 ] ); const ULONG xorChecksumFixed = DwXORChecksumFromXEChecksum( checksumFixed.rgChecksum[ 0 ] ); - (*pcprintf)( _T( "\t\tfixed checksum is 0x%016I64x\n" ), checksumFixed ); - (*pcprintf)( _T( "\t\tfixed ECC checksum is 0x%08x\n" ), eccChecksumFixed ); - (*pcprintf)( _T( "\t\tfixed XOR checksum is 0x%08x\n" ), xorChecksumFixed ); + (*pcprintf)( "\t\tfixed checksum is 0x%016I64x\n", checksumFixed ); + (*pcprintf)( "\t\tfixed ECC checksum is 0x%08x\n", eccChecksumFixed ); + (*pcprintf)( "\t\tfixed XOR checksum is 0x%08x\n", xorChecksumFixed ); if( checksumFixed == checksumStoredInHeader ) { - (*pcprintf)( _T( "\t****** page corruption was fixed ******\n" ) ); + (*pcprintf)( "\t****** page corruption was fixed ******\n" ); } else { - (*pcprintf)( _T( "\t****** page corruption fix FAILED! ******\n" ) ); + (*pcprintf)( "\t****** page corruption fix FAILED! ******\n" ); } } } @@ -682,7 +680,7 @@ void DumpPageChecksumInfo( } EXCEPT( efaExecuteHandler ) { - (*pcprintf)( _T( "\t\n" ) ); + (*pcprintf)( "\t\n" ); } ENDEXCEPT } diff --git a/dev/ese/src/ese/_osu/osustd.hxx b/dev/ese/src/ese/_osu/osustd.hxx index f05594b9..4ca2abb4 100644 --- a/dev/ese/src/ese/_osu/osustd.hxx +++ b/dev/ese/src/ese/_osu/osustd.hxx @@ -7,13 +7,6 @@ #include #include -#pragma prefast(push) -#pragma prefast(disable:26006, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:26007, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:28718, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:28726, "Dont bother us with tchar, someone else owns that.") -#include -#pragma prefast(pop) #include #include #include diff --git a/dev/ese/src/ese/bf.cxx b/dev/ese/src/ese/bf.cxx index 491797e1..0e9a3fa8 100644 --- a/dev/ese/src/ese/bf.cxx +++ b/dev/ese/src/ese/bf.cxx @@ -6,7 +6,6 @@ - // Only Init requires the global cbPage, subsequent this point the buffer manager is page independent, and // supports multiple page sizes concurrently. // @@ -9769,7 +9768,7 @@ VOID CBFIssueList::NullifyDiskTiltFake( const IFMP ifmp ) } -CCriticalSection CBFIssueList::s_critSync( CLockBasicInfo( CSyncBasicInfo( _T( "CBFIssueList::s_critSync" ) ), rankBFIssueListSync, 0 ) ); +CCriticalSection CBFIssueList::s_critSync( CLockBasicInfo( CSyncBasicInfo( "CBFIssueList::s_critSync" ), rankBFIssueListSync, 0 ) ); CMeteredSection CBFIssueList::s_msSync; @@ -9794,7 +9793,7 @@ HMEMORY_NOTIFICATION g_pMemoryNotification = NULL; // Init / Term -LOCAL CBinaryLock g_blBFMaintScheduleCancel( CLockBasicInfo( CSyncBasicInfo( _T( "BFMaint Schedule/Cancel" ) ), rankBFMaintScheduleCancel, 0 ) ); +LOCAL CBinaryLock g_blBFMaintScheduleCancel( CLockBasicInfo( CSyncBasicInfo( "BFMaint Schedule/Cancel" ), rankBFMaintScheduleCancel, 0 ) ); LOCAL volatile BOOL g_fBFMaintInitialized = fFalse; ERR ErrBFIMaintInit() @@ -10166,8 +10165,8 @@ INLINE void BFISynchronicity( void ) // Avail Pool -CSemaphore g_semMaintAvailPoolRequestUrgent( CSyncBasicInfo( _T( "g_semMaintAvailPoolRequestUrgent" ) ) ); -CSemaphore g_semMaintAvailPoolRequest( CSyncBasicInfo( _T( "g_semMaintAvailPoolRequest" ) ) ); +CSemaphore g_semMaintAvailPoolRequestUrgent( CSyncBasicInfo( "g_semMaintAvailPoolRequestUrgent" ) ); +CSemaphore g_semMaintAvailPoolRequest( CSyncBasicInfo( "g_semMaintAvailPoolRequest" ) ); LONG_PTR cbfAvailPoolLow; LONG_PTR cbfAvailPoolHigh; @@ -10367,7 +10366,7 @@ void BFIMaintAvailPoolITask( void*, void* ) // Scavenging -CSemaphore g_semMaintScavenge( CSyncBasicInfo( _T( "g_semMaintScavenge" ) ) ); +CSemaphore g_semMaintScavenge( CSyncBasicInfo( "g_semMaintScavenge" ) ); // We track the last several runs @@ -11816,7 +11815,7 @@ void BFIMaintScavengeTerm( void ) // Checkpoint Depth -CSemaphore g_semMaintCheckpointDepthRequest( CSyncBasicInfo( _T( "g_semMaintCheckpointDepthRequest" ) ) ); +CSemaphore g_semMaintCheckpointDepthRequest( CSyncBasicInfo( "g_semMaintCheckpointDepthRequest" ) ); IFMP g_ifmpMaintCheckpointDepthStart; @@ -13381,7 +13380,7 @@ ERR ErrBFIMaintCheckpointDepthIFlushPagesByIFMP( const IFMP ifmp, BOOL * const p // Checkpoint -CSemaphore g_semMaintCheckpointRequest( CSyncBasicInfo( _T( "g_semMaintCheckpointRequest" ) ) ); +CSemaphore g_semMaintCheckpointRequest( CSyncBasicInfo( "g_semMaintCheckpointRequest" ) ); TICK g_tickMaintCheckpointLast; @@ -13915,9 +13914,9 @@ void BFIMaintHashedLatchesIRedistribute() // Cache Size -CSemaphore g_semMaintCacheStatsRequest( CSyncBasicInfo( _T( "g_semMaintCacheStatsRequest" ) ) ); +CSemaphore g_semMaintCacheStatsRequest( CSyncBasicInfo( "g_semMaintCacheStatsRequest" ) ); -CSemaphore g_semMaintCacheSize( CSyncBasicInfo( _T( "g_semMaintCacheSize" ) ) ); +CSemaphore g_semMaintCacheSize( CSyncBasicInfo( "g_semMaintCacheSize" ) ); LONG g_cMaintCacheSizePending = 0; inline ICBPage IcbBFIBufferSize( _In_ const INT cbSize ) @@ -15125,7 +15124,7 @@ TICK DtickBFIMaintCacheSizeDuration() // Idle Database -CSemaphore g_semMaintIdleDatabaseRequest( CSyncBasicInfo( _T( "g_semMaintIdleDatabaseRequest" ) ) ); +CSemaphore g_semMaintIdleDatabaseRequest( CSyncBasicInfo( "g_semMaintIdleDatabaseRequest" ) ); TICK g_tickMaintIdleDatabaseLast; diff --git a/dev/ese/src/ese/cat.cxx b/dev/ese/src/ese/cat.cxx index dc3a0757..91748af0 100644 --- a/dev/ese/src/ese/cat.cxx +++ b/dev/ese/src/ese/cat.cxx @@ -16902,7 +16902,7 @@ JETUNITTEST( CATMSysLocales, TestCLocaleNameInfoArrayWillWorkAsRequiredForMSysLo li.m_cIndices = 0x2; li.m_qwVersion = 0x34; li.m_sortID = sortID; - StringCchCopyW( li.m_wszLocaleName, _countof( li.m_wszLocaleName ), L"en-us" ); + OSStrCbCopyW( li.m_wszLocaleName, sizeof( li.m_wszLocaleName ), L"en-us" ); CLocaleNameInfoArray localesarray; CLocaleNameInfoArray::ERR err; @@ -16913,7 +16913,7 @@ JETUNITTEST( CATMSysLocales, TestCLocaleNameInfoArrayWillWorkAsRequiredForMSysLo err = localesarray.ErrSetEntry( localesarray.Size(), li ); CHECK( err == CLocaleNameInfoArray::ERR::errSuccess ); - StringCchCopyW( li.m_wszLocaleName, _countof( li.m_wszLocaleName ), L"pt-br" ); + OSStrCbCopyW( li.m_wszLocaleName, sizeof( li.m_wszLocaleName ), L"pt-br" ); err = localesarray.ErrSetEntry( localesarray.Size(), li ); CHECK( err == CLocaleNameInfoArray::ERR::errSuccess ); @@ -16921,7 +16921,7 @@ JETUNITTEST( CATMSysLocales, TestCLocaleNameInfoArrayWillWorkAsRequiredForMSysLo err = localesarray.ErrSetEntry( localesarray.Size(), li ); CHECK( err == CLocaleNameInfoArray::ERR::errSuccess ); - StringCchCopyW( li.m_wszLocaleName, _countof( li.m_wszLocaleName ), L"pt-pt" ); + OSStrCbCopyW( li.m_wszLocaleName, sizeof( li.m_wszLocaleName ), L"pt-pt" ); err = localesarray.ErrSetEntry( localesarray.Size(), li ); CHECK( err == CLocaleNameInfoArray::ERR::errSuccess ); @@ -16949,25 +16949,25 @@ JETUNITTEST( CATMSysLocales, TestCLocaleNameInfoArrayWillWorkAsRequiredForMSysLo // Search for all LocaleName + version combos we inserted ... ULONG i; - StringCchCopyW( li.m_wszLocaleName, _countof( li.m_wszLocaleName ), L"pt-br" ); + OSStrCbCopyW( li.m_wszLocaleName, sizeof( li.m_wszLocaleName ), L"pt-br" ); li.m_qwVersion = 0x34; i = localesarray.SearchLinear( li, PfnCmpLocaleNameInfo ); CHECK( i == 1 ); - StringCchCopyW( li.m_wszLocaleName, _countof( li.m_wszLocaleName ), L"pt-br" ); + OSStrCbCopyW( li.m_wszLocaleName, sizeof( li.m_wszLocaleName ), L"pt-br" ); li.m_qwVersion = 0x45; i = localesarray.SearchLinear( li, PfnCmpLocaleNameInfo ); CHECK( i == 2 ); - StringCchCopyW( li.m_wszLocaleName, _countof( li.m_wszLocaleName ), L"en-us" ); + OSStrCbCopyW( li.m_wszLocaleName, sizeof( li.m_wszLocaleName ), L"en-us" ); li.m_qwVersion = 0x34; i = localesarray.SearchLinear( li, PfnCmpLocaleNameInfo ); CHECK( i == 0 ); - StringCchCopyW( li.m_wszLocaleName, _countof( li.m_wszLocaleName ), L"pt-pt" ); + OSStrCbCopyW( li.m_wszLocaleName, sizeof( li.m_wszLocaleName ), L"pt-pt" ); li.m_qwVersion = 0x45; i = localesarray.SearchLinear( li, PfnCmpLocaleNameInfo ); @@ -16996,7 +16996,7 @@ JETUNITTEST( CATMSysLocales, TestCLocaleNameInfoArrayWillWorkAsRequiredForMSysLo it = localesarray.SearchLinear( li, PfnCmpLocaleNameInfo ); CHECK( it == localesarray.iEntryNotFound ); - StringCchCopyW( li.m_wszLocaleName, _countof( li.m_wszLocaleName ), L"st-kg" ); + OSStrCbCopyW( li.m_wszLocaleName, sizeof( li.m_wszLocaleName ), L"st-kg" ); li.m_qwVersion = 0x34; it = localesarray.SearchLinear( li, PfnCmpLocaleNameInfo ); @@ -17013,7 +17013,8 @@ INLINE ERR ErrCATIParseLocaleNameInfo( { PCWSTR wszCurr; WCHAR wszSortID[PERSISTED_SORTID_MAX_LENGTH]; - ULONG cchLocaleName; + ULONG cbLocaleName; + ERR errT; // Not a return value, local only. Assert( ( NULL != wszLocaleName ) && ( NULL != pqwSortedVersion ) && ( NULL != psortID ) ); @@ -17032,14 +17033,14 @@ INLINE ERR ErrCATIParseLocaleNameInfo( { // Empty locale name is valid. wszCurr++; - cchLocaleName = 0; + cbLocaleName = 0; } else { wszCurr++; - cchLocaleName = wcscspn( wszCurr, L"," ); + cbLocaleName = wcscspn( wszCurr, L"," ) * sizeof(WCHAR); - if ( ( cchLocaleName == 0 ) || ( cchLocaleName >= NORM_LOCALE_NAME_MAX_LENGTH ) ) + if ( ( cbLocaleName == 0 ) || ( cbLocaleName >= ( sizeof(WCHAR) * NORM_LOCALE_NAME_MAX_LENGTH ) ) ) { return ErrERRCheck( JET_errDatabaseCorrupted ); } @@ -17050,8 +17051,13 @@ INLINE ERR ErrCATIParseLocaleNameInfo( Assert( 0 == LOSStrCompareW( wszLocaleEntryKey, wszExpectedLocaleName, LOSStrLengthW( wszExpectedLocaleName ) ) ); #endif // DEBUG - StringCchCopyW( wszLocaleName, cchLocaleName + 1, wszCurr ); - wszLocaleName[cchLocaleName] = L'\0'; + // Note that we're fibbing about the size of wszLocaleName in order to only + // copy a limited number of bytes from wszCurr. What we need is + // ErrOSStrCbCopyNW, where we can specify both the size of the destination + // buffer as well as the number of bytes to copy. + errT = ErrOSStrCbCopyW( wszLocaleName, cbLocaleName + sizeof( WCHAR ), wszCurr ); + Assert( JET_errSuccess == errT || JET_errBufferTooSmall == errT ); + wszLocaleName[ cbLocaleName / sizeof( WCHAR ) ] = L'\0'; // // second, grab the Sort Version out of the key @@ -17079,17 +17085,19 @@ INLINE ERR ErrCATIParseLocaleNameInfo( } wszCurr++; - ULONG cchSortID = wcscspn( wszCurr, L"," ); + ULONG cbSortID = sizeof( WCHAR ) * wcscspn( wszCurr, L"," ); C_ASSERT( _countof( wszSortID ) == PERSISTED_SORTID_MAX_LENGTH ); - if ( ( cchSortID == 0 ) || ( cchSortID != PERSISTED_SORTID_MAX_LENGTH - 1 ) ) + if ( ( cbSortID == 0 ) || ( cbSortID != (sizeof( WCHAR ) * ( PERSISTED_SORTID_MAX_LENGTH - 1 ) ) ) ) { AssertSz( fFalse, "The sort ID was not of the right size. Should be exactly the size we put in." ); return ErrERRCheck( JET_errDatabaseCorrupted ); } - StringCchCopyW( wszSortID, cchSortID + 1, wszCurr ); - wszSortID[cchSortID] = L'\0'; + // Again, fibbing. + errT = ErrOSStrCbCopyW( wszSortID, cbSortID + sizeof( WCHAR ), wszCurr ); + Assert( JET_errSuccess == errT || JET_errBufferTooSmall == errT ); + wszSortID[ cbSortID / sizeof( WCHAR ) ] = L'\0'; SortIDWsz( wszSortID, psortID ); // diff --git a/dev/ese/src/ese/cpage.cxx b/dev/ese/src/ese/cpage.cxx index 49f4b469..d2e39137 100644 --- a/dev/ese/src/ese/cpage.cxx +++ b/dev/ese/src/ese/cpage.cxx @@ -44,7 +44,6 @@ i.e Assignment changes ownership -- like the unique_ptr template *******************************************************************/ #include "std.hxx" - #include // required for _alloca() // We have moved all globals / statics involving g_cbPage out of @@ -6507,7 +6506,7 @@ ERR ErrAccumulatePageStats( // ================================================================ -VOID CPAGE::DumpAllocMap_( _TCHAR * rgchBuf, CPRINTF * pcprintf ) const +VOID CPAGE::DumpAllocMap_( CHAR * rgchBuf, CPRINTF * pcprintf ) const // ================================================================ // // Prints a 'map' of the page, showing how it is used. @@ -6530,7 +6529,7 @@ VOID CPAGE::DumpAllocMap_( _TCHAR * rgchBuf, CPRINTF * pcprintf ) const // header for ( ich = 0; ich < CbPageHeader(); ++ich ) { - rgchBuf[ich+ichBase] = _T( 'H' ); + rgchBuf[ich+ichBase] = 'H'; } ichBase = ich; @@ -6543,7 +6542,7 @@ VOID CPAGE::DumpAllocMap_( _TCHAR * rgchBuf, CPRINTF * pcprintf ) const Assert( ptag->Ib( FSmallPageFormat() ) < min( CbPage(), m_platchManager->CbBuffer( m_bfl ) ) ); for ( ich = ptag->Ib( FSmallPageFormat() ); ich < (ptag->Cb( FSmallPageFormat() ) + ptag->Ib( FSmallPageFormat() )); ++ich ) { - rgchBuf[ich+ichBase] = _T( 'E' ); + rgchBuf[ich+ichBase] = 'E'; } for ( itag = 1; itag < ITagMicFree_(); ++itag ) @@ -6573,7 +6572,7 @@ VOID CPAGE::DumpAllocMap_( _TCHAR * rgchBuf, CPRINTF * pcprintf ) const ich = ptagT->Ib( FSmallPageFormat() ); for ( ; ich < ((ptagT->Cb( FSmallPageFormat() )) + (ptagT->Ib( FSmallPageFormat() ))); ++ich ) { - rgchBuf[ich+ichBase] = ( iptag % 2 ) ? _T( '%' ) : _T( '#' ); + rgchBuf[ich+ichBase] = ( iptag % 2 ) ? '%' : '#'; } } @@ -6583,19 +6582,19 @@ VOID CPAGE::DumpAllocMap_( _TCHAR * rgchBuf, CPRINTF * pcprintf ) const for ( ich = 0; ich < (INT) ( sizeof( CPAGE::TAG ) * ITagMicFree_() ); ++ich ) { - rgchBuf[ich+ichBase] = _T( 'T' ); + rgchBuf[ich+ichBase] = 'T'; } // print the map for ( INT iRow = 0; iRow < (INT)m_platchManager->CbBuffer( m_bfl )/cchDumpAllocRow; ++iRow ) { - _TCHAR rgchLineBuf[cchDumpAllocRow+1+1]; - UtilMemCpy( rgchLineBuf, &(rgchBuf[iRow*cchDumpAllocRow]), cchDumpAllocRow * sizeof( _TCHAR ) ); - rgchLineBuf[cchDumpAllocRow] = _T( '\n' ); + CHAR rgchLineBuf[cchDumpAllocRow+1+1]; + UtilMemCpy( rgchLineBuf, &(rgchBuf[iRow*cchDumpAllocRow]), cchDumpAllocRow * sizeof( CHAR ) ); + rgchLineBuf[cchDumpAllocRow] = '\n'; rgchLineBuf[cchDumpAllocRow+1] = 0; (*pcprintf)( "%s", rgchLineBuf ); } - (*pcprintf)( _T( "\n" ) ); + (*pcprintf)( "\n" ); } // ================================================================ @@ -6603,12 +6602,12 @@ ERR CPAGE::DumpAllocMap( CPRINTF * pcprintf ) const // ================================================================ { const INT cchBuf = CbPage(); - _TCHAR rgchBuf[g_cbPageMax]; + CHAR rgchBuf[g_cbPageMax]; for ( INT ich = 0; ich < cchBuf && ich < (sizeof(rgchBuf)/sizeof(rgchBuf[0])); ++ich ) { // we have to use a loop, not memset, so this will work with unicode - rgchBuf[ich] = _T( '.' ); + rgchBuf[ich] = '.'; } #pragma prefast( pop ) @@ -6640,7 +6639,7 @@ ERR CPAGE::DumpTags( CPRINTF * pcprintf, DWORD_PTR dwOffset ) const if ( ErrEnumTags( ErrAccumulatePageStats, (void*)&btsPageSpace ) < JET_errSuccess ) { - (*pcprintf)( _T( "Failed to accumulate page stats!\n" ) ); + (*pcprintf)( "Failed to accumulate page stats!\n" ); } for ( INT itag = 0; itag < ITagMicFree_(); ++itag ) @@ -6663,7 +6662,7 @@ ERR CPAGE::DumpTags( CPRINTF * pcprintf, DWORD_PTR dwOffset ) const { if ( itag < CTagReserved_() ) { - (*pcprintf)( _T( "TAG %3d: cb:0x%04x,ib:0x%04x offset:0x%04x-0x%04x flags:0x%04x %s" ), + (*pcprintf)( "TAG %3d: cb:0x%04x,ib:0x%04x offset:0x%04x-0x%04x flags:0x%04x %s", itag, ptag->Cb( FSmallPageFormat() ), ptag->Ib( FSmallPageFormat() ), @@ -6684,7 +6683,7 @@ ERR CPAGE::DumpTags( CPRINTF * pcprintf, DWORD_PTR dwOffset ) const if( ErrSPREPAIRValidateSpaceNode( &kdf, &pgnoLast, &cpgExtent, &cwszPoolName ) >= JET_errSuccess ) { (*pcprintf)( - _T( "TAG %3d: cb=0x%04x,ib=0x%04x SP: %ws: %d,%d-%d flags=0x%04x %s" ), + "TAG %3d: cb=0x%04x,ib=0x%04x SP: %ws: %d,%d-%d flags=0x%04x %s", itag, ptag->Cb( FSmallPageFormat() ), ptag->Ib( FSmallPageFormat() ), @@ -6700,7 +6699,7 @@ ERR CPAGE::DumpTags( CPRINTF * pcprintf, DWORD_PTR dwOffset ) const if ( !fHandledSpecialCase ) { - (*pcprintf)( _T( "TAG %3d: cb:0x%04x,ib:0x%04x prefix:cb=0x%04x suffix:cb=0x%04x data:cb=0x%04x offset:0x%04x-0x%04x flags:0x%04x %s" ), + (*pcprintf)( "TAG %3d: cb:0x%04x,ib:0x%04x prefix:cb=0x%04x suffix:cb=0x%04x data:cb=0x%04x offset:0x%04x-0x%04x flags:0x%04x %s", itag, ptag->Cb( FSmallPageFormat() ), ptag->Ib( FSmallPageFormat() ), @@ -6721,7 +6720,7 @@ ERR CPAGE::DumpTags( CPRINTF * pcprintf, DWORD_PTR dwOffset ) const if ( itag < CTagReserved_() ) { (*pcprintf)( - _T( "TAG %3d: pb=0x%I64x,cb=0x%04x,ib=0x%04x flags=0x%04x %s" ), + "TAG %3d: pb=0x%I64x,cb=0x%04x,ib=0x%04x flags=0x%04x %s", itag, __int64( dwAddress ), ptag->Cb( FSmallPageFormat() ), @@ -6741,7 +6740,7 @@ ERR CPAGE::DumpTags( CPRINTF * pcprintf, DWORD_PTR dwOffset ) const if( ErrSPREPAIRValidateSpaceNode( &kdf, &pgnoLast, &cpgExtent, &wsczPoolName ) >= JET_errSuccess ) { (*pcprintf)( - _T( "TAG %3d: pb=0x%I64x,cb=0x%04x,ib=0x%04x SP: %ws: %d,%d-%d flags=0x%04x %s" ), + "TAG %3d: pb=0x%I64x,cb=0x%04x,ib=0x%04x SP: %ws: %d,%d-%d flags=0x%04x %s", itag, __int64( dwAddress ), ptag->Cb( FSmallPageFormat() ), @@ -6760,7 +6759,7 @@ ERR CPAGE::DumpTags( CPRINTF * pcprintf, DWORD_PTR dwOffset ) const { // If nothing could recognize and handle the special case, then print out a regular line. (*pcprintf)( - _T( "TAG %3d: pb=0x%I64x,cb=0x%04x,ib=0x%04x prefix:cb=0x%04x suffix:pb=0x%I64x,cb=0x%04x data:pb=0x%I64x,cb=0x%04x flags=0x%04x %s" ), + "TAG %3d: pb=0x%I64x,cb=0x%04x,ib=0x%04x prefix:cb=0x%04x suffix:pb=0x%I64x,cb=0x%04x data:pb=0x%I64x,cb=0x%04x flags=0x%04x %s", itag, __int64( dwAddress ), ptag->Cb( FSmallPageFormat() ), @@ -6793,44 +6792,44 @@ ERR CPAGE::DumpTags( CPRINTF * pcprintf, DWORD_PTR dwOffset ) const if ( 0 == ITagMicFree_() ) { - (*pcprintf)( _T( "[No tags found]\n" ) ); + (*pcprintf)( "[No tags found]\n" ); } else { - (*pcprintf)( _T( "\n" ) ); + (*pcprintf)( "\n" ); if ( CStatsFromPv(btsPageSpace.phistoNodeCounts)->C() ) { Assert( CStatsFromPv( btsPageSpace.phistoNodeCounts )->C() == CTagReserved_() ); Assert( CStatsFromPv(btsPageSpace.phistoNodeCounts)->Min() == CStatsFromPv(btsPageSpace.phistoNodeCounts)->Ave() ); Assert( CStatsFromPv(btsPageSpace.phistoNodeCounts)->Max() == CStatsFromPv(btsPageSpace.phistoNodeCounts)->Ave() ); - (*pcprintf)( _T( "Nodes: %I64d\n" ), + (*pcprintf)( "Nodes: %I64d\n", CStatsFromPv(btsPageSpace.phistoNodeCounts)->Ave(), CStatsFromPv(btsPageSpace.phistoKeyCompression)->C(), CStatsFromPv(btsPageSpace.phistoUnreclaimedBytes)->C() ); - (*pcprintf)( _T( " min, ave, max, total\n" ) ); - (*pcprintf)( _T( " Logical Key Sizes: %5I64d, %6.1f, %5I64d, %5I64d\n" ), + (*pcprintf)( " min, ave, max, total\n" ); + (*pcprintf)( " Logical Key Sizes: %5I64d, %6.1f, %5I64d, %5I64d\n", CStatsFromPv(btsPageSpace.phistoKeySizes)->Min(), CStatsFromPv(btsPageSpace.phistoKeySizes)->DblAve(), CStatsFromPv(btsPageSpace.phistoKeySizes)->Max(), CStatsFromPv(btsPageSpace.phistoKeySizes)->Total() ); if ( CStatsFromPv(btsPageSpace.phistoKeyCompression)->C() ) { - (*pcprintf)( _T( " Key Compression: %5I64d, %6.1f, %5I64d, %5I64d (nodes=%I64d)\n" ), + (*pcprintf)( " Key Compression: %5I64d, %6.1f, %5I64d, %5I64d (nodes=%I64d)\n", CStatsFromPv(btsPageSpace.phistoKeyCompression)->Min(), CStatsFromPv(btsPageSpace.phistoKeyCompression)->DblAve(), CStatsFromPv(btsPageSpace.phistoKeyCompression)->Max(), CStatsFromPv(btsPageSpace.phistoKeyCompression)->Total(), CStatsFromPv(btsPageSpace.phistoKeyCompression)->C() ); } - (*pcprintf)( _T( " Node Data Sizes: %5I64d, %6.1f, %5I64d, %5I64d\n" ), + (*pcprintf)( " Node Data Sizes: %5I64d, %6.1f, %5I64d, %5I64d\n", CStatsFromPv(btsPageSpace.phistoDataSizes)->Min(), CStatsFromPv(btsPageSpace.phistoDataSizes)->DblAve(), CStatsFromPv(btsPageSpace.phistoDataSizes)->Max(), CStatsFromPv(btsPageSpace.phistoDataSizes)->Total() ); if ( CStatsFromPv(btsPageSpace.phistoUnreclaimedBytes)->C() ) { - (*pcprintf)( _T( " Unreclaimed Space: %5I64d, %6.1f, %5I64d, %5I64d (nodes=%I64d)\n" ), + (*pcprintf)( " Unreclaimed Space: %5I64d, %6.1f, %5I64d, %5I64d (nodes=%I64d)\n", CStatsFromPv(btsPageSpace.phistoUnreclaimedBytes)->Min(), CStatsFromPv(btsPageSpace.phistoUnreclaimedBytes)->DblAve(), CStatsFromPv(btsPageSpace.phistoUnreclaimedBytes)->Max(), @@ -6840,12 +6839,12 @@ ERR CPAGE::DumpTags( CPRINTF * pcprintf, DWORD_PTR dwOffset ) const } else { - (*pcprintf)( _T( " No nodes, except maybe external header. No stats.\n" ) ); + (*pcprintf)( " No nodes, except maybe external header. No stats.\n" ); } } - (*pcprintf)( _T( "\n" ) ); + (*pcprintf)( "\n" ); return 0; } @@ -6871,7 +6870,7 @@ VOID CPAGE::DumpTag( CPRINTF * pcprintf, const INT iline, const DWORD_PTR dwOffs ptag->FFlags( this, FSmallPageFormat() ) & fNDCompressed ? 'c' : ' ' ); (*pcprintf)( - _T( "TAG %d: pb=0x%I64x,cb=0x%04x,ib=0x%04x prefix:cb=0x%04x suffix:pb=0x%I64x,cb=0x%04x data:pb=0x%I64x,cb=0x%04x flags=0x%04x %s\n" ), + "TAG %d: pb=0x%I64x,cb=0x%04x,ib=0x%04x prefix:cb=0x%04x suffix:pb=0x%I64x,cb=0x%04x data:pb=0x%I64x,cb=0x%04x flags=0x%04x %s\n", itag, __int64( PbFromIb_( 0 ) + ptag->Ib( FSmallPageFormat() ) + dwOffset ), ptag->Cb( FSmallPageFormat() ), @@ -6904,7 +6903,7 @@ ERR CPAGE::DumpHeader( CPRINTF * pcprintf, DWORD_PTR dwOffset ) const DumpPageChecksumInfo( m_bfl.pv, m_platchManager->CbBuffer( m_bfl ), databasePage, m_pgno, pcprintf ); const __int64 chksumLogPage = LoggedDataChecksum().rgChecksum[0]; (*pcprintf)( " logged data checksum = %16I64x\n", chksumLogPage ); - (*pcprintf)( _T( "\n" ) ); + (*pcprintf)( "\n" ); (*pcprintf)( FORMAT_INT( CPAGE::PGHDR, (PGHDR*)m_bfl.pv, checksum, dwOffset ) ); (*pcprintf)( FORMAT_UINT( CPAGE::PGHDR, (PGHDR*)m_bfl.pv, dbtimeDirtied, dwOffset ) ); @@ -6929,29 +6928,29 @@ ERR CPAGE::DumpHeader( CPRINTF * pcprintf, DWORD_PTR dwOffset ) const if( FLeafPage() ) { - (*pcprintf)( _T( "\t\tLeaf page\n" ) ); + (*pcprintf)( "\t\tLeaf page\n" ); } if( FParentOfLeaf() ) { - (*pcprintf)( _T( "\t\tParent of leaf\n" ) ); + (*pcprintf)( "\t\tParent of leaf\n" ); } if( FInvisibleSons() ) { - (*pcprintf)( _T( "\t\tInternal page\n" ) ); + (*pcprintf)( "\t\tInternal page\n" ); } if( FRootPage() ) { - (*pcprintf)( _T( "\t\tRoot page\n" ) ); + (*pcprintf)( "\t\tRoot page\n" ); } BOOL fNewExtHdrFormat = fFalse; BYTE fNodeFlag = 0; if( FFDPPage() ) { - (*pcprintf)( _T( "\t\tFDP page\n" ) ); + (*pcprintf)( "\t\tFDP page\n" ); const TAG * const ptag = PtagFromItag_( 0 ); @@ -6964,7 +6963,7 @@ ERR CPAGE::DumpHeader( CPRINTF * pcprintf, DWORD_PTR dwOffset ) const || ptag->Ib( FSmallPageFormat() ) < 0 || ptag->Ib( FSmallPageFormat() ) > m_platchManager->CbBuffer( m_bfl ) - CbPageHeader() - sizeof(TAG) ) { - (*pcprintf)( _T( "\t\tCorrupted External Header\n" ) ); + (*pcprintf)( "\t\tCorrupted External Header\n" ); } else { @@ -6978,14 +6977,14 @@ ERR CPAGE::DumpHeader( CPRINTF * pcprintf, DWORD_PTR dwOffset ) const fNodeFlag = *pb; ++pb; - (*pcprintf)( _T( "\t\tNew external header format\n" ) ); + (*pcprintf)( "\t\tNew external header format\n" ); if ( fNodeFlag & BNDIGetPersistedNrfFlag( noderfSpaceHeader ) ) { - (*pcprintf)( _T( "\t\t\tSpace header flag presents\n" ) ); + (*pcprintf)( "\t\t\tSpace header flag presents\n" ); } if ( fNodeFlag & BNDIGetPersistedNrfFlag( noderfIsamAutoInc ) ) { - (*pcprintf)( _T( "\t\t\tAutoInc flag presents\n" ) ); + (*pcprintf)( "\t\t\tAutoInc flag presents\n" ); } const USHORT usTagSize = ptag->Cb( FSmallPageFormat() ); @@ -7002,7 +7001,7 @@ ERR CPAGE::DumpHeader( CPRINTF * pcprintf, DWORD_PTR dwOffset ) const // check the expected tag size is consistent with the flag if ( usExpectedTagSize != usTagSize ) { - (*pcprintf)( _T( "\t\tCorrupted Extended External Header. External header flag %d, Expected external header size %d, actual size %d.\n" ), + (*pcprintf)( "\t\tCorrupted Extended External Header. External header flag %d, Expected external header size %d, actual size %d.\n", fNodeFlag, usExpectedTagSize, usTagSize ); @@ -7016,7 +7015,7 @@ ERR CPAGE::DumpHeader( CPRINTF * pcprintf, DWORD_PTR dwOffset ) const } else { - (*pcprintf)( _T( "Corruption, on a FDP page with no space header!" ) ); + (*pcprintf)( "Corruption, on a FDP page with no space header!" ); AssertSz( fFalse, "Corruption, on an FDP page with no space header!" ); } @@ -7036,35 +7035,35 @@ ERR CPAGE::DumpHeader( CPRINTF * pcprintf, DWORD_PTR dwOffset ) const if ( psph->FMultipleExtent() ) { (*pcprintf)( - _T( "\t\t\tMultiple Extent Space (ParentFDP: %d, pgnoOE: %d)\n" ), + "\t\t\tMultiple Extent Space (ParentFDP: %d, pgnoOE: %d)\n", psph->PgnoParent(), psph->PgnoOE() ); } else { - (*pcprintf)( _T( "\t\t\tSingle Extent Space (ParentFDP: %d, CpgPri: %d, AvailBitmap: 0x%08X)\n" ), psph->PgnoParent(), psph->CpgPrimary(), psph->RgbitAvail() ); + (*pcprintf)( "\t\t\tSingle Extent Space (ParentFDP: %d, CpgPri: %d, AvailBitmap: 0x%08X)\n", psph->PgnoParent(), psph->CpgPrimary(), psph->RgbitAvail() ); } } if ( fNeedPrintAutoInc ) { - (*pcprintf)( _T( "\t\t\tAuto increment maximum: %d\n" ), qwAutoInc ); + (*pcprintf)( "\t\t\tAuto increment maximum: %d\n", qwAutoInc ); } } } if( FEmptyPage() ) { - (*pcprintf)( _T( "\t\tEmpty page\n" ) ); + (*pcprintf)( "\t\tEmpty page\n" ); } if( FPreInitPage() ) { - (*pcprintf)( _T( "\t\tPre-init page\n" ) ); + (*pcprintf)( "\t\tPre-init page\n" ); } if( FSpaceTree() ) { - (*pcprintf)( _T( "\t\tSpace tree page" ) ); + (*pcprintf)( "\t\tSpace tree page" ); if ( FRootPage() ) { @@ -7073,7 +7072,7 @@ ERR CPAGE::DumpHeader( CPRINTF * pcprintf, DWORD_PTR dwOffset ) const || ptag->Ib( FSmallPageFormat() ) < 0 || ptag->Ib( FSmallPageFormat() ) > m_platchManager->CbBuffer( m_bfl ) - CbPageHeader() - sizeof(TAG) ) { - (*pcprintf)( _T( "\tCorrupted Split Buffer!\n" ) ); + (*pcprintf)( "\tCorrupted Split Buffer!\n" ); } else { @@ -7082,56 +7081,56 @@ ERR CPAGE::DumpHeader( CPRINTF * pcprintf, DWORD_PTR dwOffset ) const 0 == pslitbuf->CpgBuffer2() ) { - (*pcprintf)( _T( " (spbuf: none)\n" ) ); + (*pcprintf)( " (spbuf: none)\n" ); } else { - (*pcprintf)( _T( " (spbuf:" ) ); + (*pcprintf)( " (spbuf:" ); if ( pslitbuf->CpgBuffer1() ) { - (*pcprintf)( _T( " buf1: %d-%d (%d)" ), + (*pcprintf)( " buf1: %d-%d (%d)", pslitbuf->PgnoLastBuffer1() - pslitbuf->CpgBuffer1() + 1, pslitbuf->PgnoLastBuffer1(), pslitbuf->CpgBuffer1() ); } if ( pslitbuf->CpgBuffer2() ) { - (*pcprintf)( _T( " buf2: %d-%d (%d)" ), + (*pcprintf)( " buf2: %d-%d (%d)", pslitbuf->PgnoLastBuffer2() - pslitbuf->CpgBuffer2() + 1, pslitbuf->PgnoLastBuffer2(), pslitbuf->CpgBuffer2() ); } - (*pcprintf)( _T( ") \n" ) ); + (*pcprintf)( ") \n" ); } } } else { - (*pcprintf)( _T( "\n" ) ); + (*pcprintf)( "\n" ); } } if( FRepairedPage() ) { - (*pcprintf)( _T( "\t\tRepaired page\n" ) ); + (*pcprintf)( "\t\tRepaired page\n" ); } if( FPrimaryPage() ) { - (*pcprintf)( _T( "\t\tPrimary page\n" ) ); + (*pcprintf)( "\t\tPrimary page\n" ); Assert( !FIndexPage() ); } if( FIndexPage() ) { - (*pcprintf)( _T( "\t\tIndex page " ) ); + (*pcprintf)( "\t\tIndex page " ); if ( FNonUniqueKeys() ) { - (*pcprintf)( _T( "(non-unique keys)\n" ) ); + (*pcprintf)( "(non-unique keys)\n" ); } else { - (*pcprintf)( _T( "(unique keys)\n" ) ); + (*pcprintf)( "(unique keys)\n" ); } } else @@ -7141,36 +7140,36 @@ ERR CPAGE::DumpHeader( CPRINTF * pcprintf, DWORD_PTR dwOffset ) const if( FLongValuePage() ) { - (*pcprintf)( _T( "\t\tLong Value page\n" ) ); + (*pcprintf)( "\t\tLong Value page\n" ); } if( FNewRecordFormat() ) { - (*pcprintf)( _T( "\t\tNew record format\n" ) ); + (*pcprintf)( "\t\tNew record format\n" ); } if( FNewChecksumFormat() ) { - (*pcprintf)( _T( "\t\tNew checksum format\n" ) ); + (*pcprintf)( "\t\tNew checksum format\n" ); } if( FScrubbed() ) { - (*pcprintf)( _T( "\t\tScrubbed\n" ) ); + (*pcprintf)( "\t\tScrubbed\n" ); } if ( FPageFDPRootDelete() ) { - (*pcprintf)( _T( "\t\tFDP Root Delete Page\n" ) ); + (*pcprintf)( "\t\tFDP Root Delete Page\n" ); } else if ( FPageFDPDelete() ) { - (*pcprintf)( _T( "\t\tFDP Delete Page\n" ) ); + (*pcprintf)( "\t\tFDP Delete Page\n" ); } - (*pcprintf)( _T( "\t\tPageFlushType = %d\n" ), Pgft() ); + (*pcprintf)( "\t\tPageFlushType = %d\n", Pgft() ); - (*pcprintf)( _T( "\n" ) ); + (*pcprintf)( "\n" ); return JET_errSuccess; } diff --git a/dev/ese/src/ese/cresmgr.cxx b/dev/ese/src/ese/cresmgr.cxx index 51d5f28e..a665df28 100644 --- a/dev/ese/src/ese/cresmgr.cxx +++ b/dev/ese/src/ese/cresmgr.cxx @@ -1882,7 +1882,7 @@ VOID CResourceManager::MarkAsFreed__( //====================================== VOID CResourceManager::IDumpAlloc( const WCHAR* szDumpFile ) { - CPRINTFFILE cprintf( szDumpFile ); + CPRINTFFILE cprintf( szDumpFile, CPRINTFFILE::FILEENCODING::ASCII ); cprintf( "ResourceID = %i Tag: \"%.*s\" object size = %i\r\n\r\n", ResID(), @@ -3057,7 +3057,7 @@ VOID OSRMPostterm() fOSRMPreinitPostTerm = fTrue; #ifdef RM_STATISTICS - CPRINTFFILE cprintf( "rmstat.txt" ); + CPRINTFFILE cprintf( "rmstat.txt", fFalse ); cprintf( "\r\n\r\n%ws\r\n", WszUtilProcessName() ); cprintf( "RESID Alloc (RFOLWait) Free (WaitChnk WaitLoop WaitSucc) ( LAGACX LAGFst LAGHit) ( LARACX LARFst LARHit)\r\n" ); cprintf( "=================================================================================================================" ); diff --git a/dev/ese/src/ese/dataserializer.cxx b/dev/ese/src/ese/dataserializer.cxx index c4dcbf53..814ee272 100644 --- a/dev/ese/src/ese/dataserializer.cxx +++ b/dev/ese/src/ese/dataserializer.cxx @@ -5,7 +5,6 @@ #include "PageSizeClean.hxx" - // ================================================================ // CPRINTFBUFFER // ================================================================ @@ -21,12 +20,12 @@ class CPRINTFBUFFER : public CPRINTF virtual ~CPRINTFBUFFER() {} const char * SzBuffer() const { return m_szBuffer; } - void __cdecl operator()( const _TCHAR* szFormat, ... ) + void __cdecl operator()( const CHAR* szFormat, ... ) { va_list arg_ptr; va_start( arg_ptr, szFormat ); const size_t cchBufferUsed = strlen( m_szBuffer ); - StringCbVPrintfA( m_szBuffer + cchBufferUsed, m_cchBuffer - cchBufferUsed, szFormat, arg_ptr ); + OSStrCbVFormatA( m_szBuffer + cchBufferUsed, m_cchBuffer - cchBufferUsed, szFormat, arg_ptr ); va_end( arg_ptr ); } diff --git a/dev/ese/src/ese/dbscan.cxx b/dev/ese/src/ese/dbscan.cxx index 71eadf8f..95a4670e 100644 --- a/dev/ese/src/ese/dbscan.cxx +++ b/dev/ese/src/ese/dbscan.cxx @@ -4652,9 +4652,9 @@ DBMScan::DBMScan( m_ppib( ppib ), m_cscanobservers( 0 ), m_threadDBMScan( 0 ), - m_critSignalControl( CLockBasicInfo( CSyncBasicInfo( _T("DBMScan::m_critSignalControl" ) ), rankDBMScanSignalControl, 0 ) ), - m_msigDBScanStop( CSyncBasicInfo( _T("DBMScan::m_msigDBScanStop" ) ) ), - m_msigDBScanGo( CSyncBasicInfo( _T("DBMScan::m_msigDBScanGo" ) ) ), + m_critSignalControl( CLockBasicInfo( CSyncBasicInfo( "DBMScan::m_critSignalControl" ), rankDBMScanSignalControl, 0 ) ), + m_msigDBScanStop( CSyncBasicInfo( "DBMScan::m_msigDBScanStop" ) ), + m_msigDBScanGo( CSyncBasicInfo( "DBMScan::m_msigDBScanGo" ) ), m_pidbmScanSerializationObj( NULL ), m_cscansFinished( 0 ), m_fNeedToSuspendPass( false ), @@ -5326,7 +5326,7 @@ DWORD DBMScanSerializer::DwTimeSlice() const DBMScanSerializer::DBMScanSerializer( const ULONG_PTR ulKey ) : IDBMScanSerializer( ulKey, IDBMScanSerializer::idbmstypReal ), - m_critSerializer( CLockBasicInfo( CSyncBasicInfo( _T("DBMScanSerializer::m_critSerializer" ) ), rankDBMScanSerializer, 0 ) ), + m_critSerializer( CLockBasicInfo( CSyncBasicInfo( "DBMScanSerializer::m_critSerializer" ), rankDBMScanSerializer, 0 ) ), m_ilDbmScans() { } @@ -5541,7 +5541,7 @@ bool DBMScanSerializerFactory::FSerializerFactoryEmpty() } DBMScanSerializerFactory::DBMScanSerializerFactory() : - m_critSerializer( CLockBasicInfo( CSyncBasicInfo( _T("DBMScanSerializerFactory::m_critSerializer" ) ), rankDBMScanSerializerFactory, 0 ) ), + m_critSerializer( CLockBasicInfo( CSyncBasicInfo( "DBMScanSerializerFactory::m_critSerializer" ), rankDBMScanSerializerFactory, 0 ) ), m_ilSerializers(), m_cDummySerializers( 0 ) { @@ -6773,7 +6773,7 @@ TestDBMScanObserver::TestDBMScanObserver() : m_fPrepareToTermCalled( false ), m_cpgRead( 0 ), m_pgnoBadChecksum( pgnoNull ), - m_asigFinishedPass( CSyncBasicInfo( _T("TestDBMScanObserver::asigFinishedPass" ) ) ) + m_asigFinishedPass( CSyncBasicInfo( "TestDBMScanObserver::asigFinishedPass" ) ) { } @@ -7166,7 +7166,7 @@ TestDBMScanReader::TestDBMScanReader( const PGNO pgnoLast, const PGNO pgnoBadChe m_fInError( fFalse ), m_pgnoLast( pgnoLast ), m_pgnoBadChecksum( pgnoBadChecksum ), - m_msigReadPageCalled( CSyncBasicInfo( _T("TestDBMScanReader::msigReadPageCalled" ) ) ) + m_msigReadPageCalled( CSyncBasicInfo( "TestDBMScanReader::msigReadPageCalled" ) ) { } diff --git a/dev/ese/src/ese/dbutil.cxx b/dev/ese/src/ese/dbutil.cxx index ec57b4b1..dc1ac32b 100644 --- a/dev/ese/src/ese/dbutil.cxx +++ b/dev/ese/src/ese/dbutil.cxx @@ -73,7 +73,7 @@ VOID DBUTLSprintHex( if ( cbAddress ) { - StringCbPrintfA( szDestCurrent, szDestMax - szDestCurrent + 1, "%*.*lx ", cbAddress, cbAddress, (INT)(pb - rgbSrc + cbStart) ); + OSStrCbFormatA( szDestCurrent, szDestMax - szDestCurrent + 1, "%*.*lx ", cbAddress, cbAddress, (INT)(pb - rgbSrc + cbStart) ); (*szDestMax) = 0; szDestCurrent += strlen(szDestCurrent); diff --git a/dev/ese/src/ese/fmp.cxx b/dev/ese/src/ese/fmp.cxx index 2e42ed77..1382e2aa 100644 --- a/dev/ese/src/ese/fmp.cxx +++ b/dev/ese/src/ese/fmp.cxx @@ -321,16 +321,16 @@ FMP::FMP() : CZeroInit( sizeof( FMP ) ), m_critLatch( CLockBasicInfo( CSyncBasicInfo( szFMP ), rankFMP, 0 ) ), m_critDbtime( CLockBasicInfo( CSyncBasicInfo( szDbtime ), rankDbtime, 0 ) ), - m_gateWriteLatch( CSyncBasicInfo( _T( "FMP::m_gateWriteLatch" ) ) ), + m_gateWriteLatch( CSyncBasicInfo( "FMP::m_gateWriteLatch" ) ), m_semRangeLock( CSyncBasicInfo( "FMP::m_semRangeLock" ) ), m_dbid( dbidMax ), m_rwlDetaching( CLockBasicInfo( CSyncBasicInfo( szFMPDetaching ), rankFMPDetaching, 0 ) ), m_rwlBFContext( CLockBasicInfo( CSyncBasicInfo( szBFFMPContext ), rankBFFMPContext, 0 ) ), m_sxwlRedoMaps( CLockBasicInfo( CSyncBasicInfo( szFMPRedoMaps ), rankFMPRedoMaps, 0 ) ), - m_semIOSizeChange( CSyncBasicInfo( _T( "FMP::m_semIOSizeChange" ) ) ), + m_semIOSizeChange( CSyncBasicInfo( "FMP::m_semIOSizeChange" ) ), m_cAsyncIOForViewCachePending( 0 ), - m_semTrimmingDB( CSyncBasicInfo( _T( "FMP::m_semTrimmingDB" ) ) ), - m_semDBM( CSyncBasicInfo( _T( "FMP::m_semDBM" ) ) ), + m_semTrimmingDB( CSyncBasicInfo( "FMP::m_semTrimmingDB" ) ), + m_semDBM( CSyncBasicInfo( "FMP::m_semDBM" ) ), m_isdlCreate( isdltypeCreate ), m_isdlAttach( isdltypeAttach ), m_isdlDetach( isdltypeDetach ), diff --git a/dev/ese/src/ese/io.cxx b/dev/ese/src/ese/io.cxx index 70158456..e3338bec 100644 --- a/dev/ese/src/ese/io.cxx +++ b/dev/ese/src/ese/io.cxx @@ -4607,7 +4607,7 @@ ERR ErrBeginDatabaseIncReseedTracing_( _In_ IFileSystemAPI * pfsapi, _In_ JET_PC // create the tracing file - CPRINTF * const pcprintfAlloc = new CPRINTFFILE( wszIrsRawFile ); + CPRINTF * const pcprintfAlloc = new CPRINTFFILE( wszIrsRawFile, CPRINTFFILE::FILEENCODING::ASCII ); Alloc( pcprintfAlloc ); // avoid clobbering the default / NULL tracer // set tracing to goto the tracing file @@ -6903,7 +6903,7 @@ ERR ErrIOReadDbPages( PGNO pgnoMaxDb = pgnoEnd + 1; volatile LONG acRead = 0; - CAutoResetSignal asigDone( CSyncBasicInfo( _T( "ErrIOReadDbPages::asigDone" ) ) ); + CAutoResetSignal asigDone( CSyncBasicInfo( "ErrIOReadDbPages::asigDone" ) ); READPAGE_DATA readdata; readdata.err = JET_errSuccess; diff --git a/dev/ese/src/ese/jetapi.cxx b/dev/ese/src/ese/jetapi.cxx index 571c7a51..425c01a2 100644 --- a/dev/ese/src/ese/jetapi.cxx +++ b/dev/ese/src/ese/jetapi.cxx @@ -2622,14 +2622,17 @@ LOCAL const BYTE bProcFriendlyNameAggregationID = 1; LONG LProcFriendlyNameICFLPwszPpb( _In_ LONG icf, _Inout_opt_ void* const pvParam1, _Inout_opt_ void* const pvParam2 ) { + ERR err; + switch ( icf ) { case ICFInit: { // Even though we'll truncate the string at cchPerfmonInstanceNameMax, - // the OSStrCbFormatW() will assert if it truncates the string. - (void) ErrOSStrCbFormatW( g_wszProcName, sizeof(g_wszProcName), L"%ws\0" , WszUtilProcessFriendlyName() ); - + // OSStrCbFormatW() will Assert on anything but JET_errSuccess, and + // truncation is not JET_errSuccess. + err = ErrOSStrCbFormatW( g_wszProcName, sizeof(g_wszProcName), L"%ws\0" , WszUtilProcessFriendlyName() ); + Assert( err == JET_errSuccess || err == JET_errBufferTooSmall ); AtomicExchange( &g_lRefreshPerfInstanceList, 1 ); return 1; diff --git a/dev/ese/src/ese/logapi.cxx b/dev/ese/src/ese/logapi.cxx index 51653b13..dced7fdf 100644 --- a/dev/ese/src/ese/logapi.cxx +++ b/dev/ese/src/ese/logapi.cxx @@ -5169,7 +5169,7 @@ const char * SzLrtyp( LRTYP lrtyp ) ERR ErrLrToLogCsvSimple( - CWPRINTFFILE * pcwpfCsvOut, + CPRINTFFILE * pcpfCsvOutW, LGPOS lgpos, const LR *plr, LOG * plog ) @@ -6853,7 +6853,7 @@ And here was the list of the log file after recovery szLgposLR, cbLR, pChangeInfo[i].ulChecksum1, pChangeInfo[i].ulChecksum2 ); OSStrCbAppendW( szLR, sizeof(szLR), rgwchBuf ); - (*pcwpfCsvOut)( L"%s", szLR ); + (*pcpfCsvOutW)( L"%s", szLR ); } else if ( pLogRecordsCsvFormats[i] == szLogRecordDatabaseInfo ) { @@ -6871,7 +6871,7 @@ And here was the list of the log file after recovery pChangeInfo[i].ulChecksum1, pChangeInfo[i].ulChecksum2, szLRTyp, pChangeInfo[i].dbid, pChangeInfo[i].szDbPath, rgwchSignBuf ); OSStrCbAppendW( szLR, sizeof(szLR), rgwchBuf ); - (*pcwpfCsvOut)( L"%s", szLR ); + (*pcpfCsvOutW)( L"%s", szLR ); } else if ( pLogRecordsCsvFormats[i] == szLogRecordPgChangeInfo ) { @@ -6883,7 +6883,7 @@ And here was the list of the log file after recovery szLRTyp, pChangeInfo[i].pgno, pChangeInfo[i].objid, pChangeInfo[i].dbid, pChangeInfo[i].dbtimePre, pChangeInfo[i].dbtimePost ); OSStrCbAppendW( szLR, sizeof(szLR), rgwchBuf ); - (*pcwpfCsvOut)( L"%s", szLR ); + (*pcpfCsvOutW)( L"%s", szLR ); } else if ( pLogRecordsCsvFormats[i] == szLogRecordMiscelLrInfo ) @@ -6895,7 +6895,7 @@ And here was the list of the log file after recovery pChangeInfo[i].ulChecksum1, pChangeInfo[i].ulChecksum2, szLRTyp ); OSStrCbAppendW( szLR, sizeof(szLR), rgwchBuf ); - (*pcwpfCsvOut)( L"%s", szLR ); + (*pcpfCsvOutW)( L"%s", szLR ); } else if ( pLogRecordsCsvFormats[i] == szLogRecordResizeDatabaseInfo || pLogRecordsCsvFormats[i] == szLogRecordTrimDatabaseInfo ) @@ -6907,14 +6907,14 @@ And here was the list of the log file after recovery pChangeInfo[i].ulChecksum1, pChangeInfo[i].ulChecksum2, szLRTyp, pChangeInfo[i].dbid ); OSStrCbAppendW( szLR, sizeof(szLR), rgwchBuf ); - (*pcwpfCsvOut)( L"%s", szLR ); + (*pcpfCsvOutW)( L"%s", szLR ); } else { AssertSz( fFalse, "Unknown CSV type!!!" ); } - (*pcwpfCsvOut)( L"\r\n" ); + (*pcpfCsvOutW)( L"\r\n" ); // Only print out size with the first csv line for a log record (to avoid double counting) cbLR = 0; } diff --git a/dev/ese/src/ese/repair.cxx b/dev/ese/src/ese/repair.cxx index b59eb0af..5ae17513 100644 --- a/dev/ese/src/ese/repair.cxx +++ b/dev/ese/src/ese/repair.cxx @@ -265,7 +265,7 @@ struct CHECKTABLE // need a constructor to initialize the signal - CHECKTABLE() : signal( CSyncBasicInfo( _T( "CHECKTABLE::signal" ) ) ) {} + CHECKTABLE() : signal( CSyncBasicInfo( "CHECKTABLE::signal" ) ) {} }; @@ -1218,7 +1218,7 @@ ERR ErrDBUTLRepair( JET_SESID sesid, const JET_DBUTIL_W *pdbutil, CPRINTF* const CallR( ErrERRCheck( JET_errInvalidParameter ) ); } OSStrCbFormatW(wszFile, sizeof( wszFile ), L"%s%s", wszPrefix, L".INTEG.RAW" ); - CPRINTFFILE cprintfFile( wszFile ); + CPRINTFFILE cprintfFile( wszFile, CPRINTFFILE::FILEENCODING::ASCII ); // we check this only if we are going to use the szFile in the next line // @@ -1231,7 +1231,7 @@ ERR ErrDBUTLRepair( JET_SESID sesid, const JET_DBUTIL_W *pdbutil, CPRINTF* const OSStrCbFormatW(wszFile, sizeof( wszFile ), L"%s%s", wszPrefix, L".INTGINFO.TXT" ); } CPRINTF * const pcprintfStatsInternal = ( pdbutil->grbitOptions & JET_bitDBUtilOptionStats ) ? - new CPRINTFFILE( wszFile ) : + new CPRINTFFILE( wszFile, CPRINTFFILE::FILEENCODING::ASCII ) : CPRINTFNULL::PcprintfInstance(); if ( NULL == pcprintfStatsInternal ) { diff --git a/dev/ese/src/ese/revertsnapshot.cxx b/dev/ese/src/ese/revertsnapshot.cxx index 7d5ca70b..ed4fae8b 100644 --- a/dev/ese/src/ese/revertsnapshot.cxx +++ b/dev/ese/src/ese/revertsnapshot.cxx @@ -3564,7 +3564,7 @@ RBSCleaner::RBSCleaner( IRBSCleanerConfig* const prbscleanerconfig ) : CZeroInit( sizeof( RBSCleaner ) ), m_pinst( pinst ), - m_msigRBSCleanerStop( CSyncBasicInfo( _T( "RBSCleaner::m_msigRBSCleanerStop" ) ) ), + m_msigRBSCleanerStop( CSyncBasicInfo( "RBSCleaner::m_msigRBSCleanerStop" ) ), m_critRBSFirstValidGen( CLockBasicInfo( CSyncBasicInfo( szRBSFirstValidGen ), rankRBSFirstValidGen, 0 ) ), m_prbscleaneriooperator( prbscleaneriooperator ), m_prbscleanerstate( prbscleanerstate ), @@ -3985,7 +3985,7 @@ CRBSDatabaseRevertContext::CRBSDatabaseRevertContext( _In_ INST* const pinst ) : CZeroInit( sizeof( CRBSDatabaseRevertContext ) ), m_pinst ( pinst ), m_dbidCurrent ( dbidMax ), - m_asigWritePossible( CSyncBasicInfo( _T( "CRBSDatabaseRevertContext::m_asigWritePossible" ) ) ) + m_asigWritePossible( CSyncBasicInfo( "CRBSDatabaseRevertContext::m_asigWritePossible" ) ) { Assert( pinst ); } @@ -5354,7 +5354,7 @@ ERR CRBSRevertContext::ErrBeginRevertTracing( bool fDeleteOldTraceFile ) } // create the tracing file - CPRINTF * const pcprintfAlloc = new CPRINTFFILE( wszRBSRCRawFile ); + CPRINTF * const pcprintfAlloc = new CPRINTFFILE( wszRBSRCRawFile, CPRINTFFILE::FILEENCODING::ASCII ); Alloc( pcprintfAlloc ); // avoid clobbering the default / NULL tracer // set tracing to goto the tracing file diff --git a/dev/ese/src/ese/sysver.cxx b/dev/ese/src/ese/sysver.cxx index 56eb1453..88866579 100644 --- a/dev/ese/src/ese/sysver.cxx +++ b/dev/ese/src/ese/sysver.cxx @@ -279,22 +279,20 @@ ERR ErrLGFindHighestMatchingLogMajors( _In_ const LogVersion& lgvFromLogHeader, } // create a formatted string at end of a given buffer +// Explicitly NOT part of the OSStr API because it's very +// inefficient in the wrong context (like a loop on a big string -void __cdecl OSStrCbAppendFormatW ( __inout_bcount(cbBuffer) PWSTR wszBuffer, size_t cbBuffer, __format_string PCWSTR cwszFormat, ... ) +void __cdecl AppendFormatW ( __inout_bcount(cbBuffer) PWSTR wszBuffer, size_t cbBuffer, __format_string PCWSTR cwszFormat, ... ) { va_list alist; va_start( alist, cwszFormat ); const size_t cbUsed = LOSStrLengthW( wszBuffer ) * sizeof(WCHAR); Assert( cbUsed < cbBuffer ); // did someone pass in an unit buffer? Assert( ( cbUsed % sizeof(WCHAR) ) == 0 ); - HRESULT hr = StringCbVPrintfW( wszBuffer + ( cbUsed / sizeof(WCHAR) ), cbBuffer - cbUsed, cwszFormat, alist ); -#ifdef DEBUG - CallS( ErrFromStrsafeHr( hr ) ); -#endif + OSStrCbVFormatW( wszBuffer + ( cbUsed / sizeof(WCHAR) ), cbBuffer - cbUsed, cwszFormat, alist ); va_end( alist ); } - // A nice formatting of the JET_paramEngineFormatValue setting ... the problem is we have an enum possibly combined with a // flag and so it is hard to translate / easily lookup if we just print it either as decimal or hex (because the EFV values // are not in hex). @@ -343,22 +341,22 @@ void FormatEfvSetting( const JET_ENGINEFORMATVERSION efvFullParam, _Out_writes_b BOOL fNeedOr = fFalse; if ( efvBaseValue != 0 ) { - OSStrCbAppendFormatW( wszEfvSetting, cbEfvSetting, L"%d", efvBaseValue ); + AppendFormatW( wszEfvSetting, cbEfvSetting, L"%d", efvBaseValue ); fNeedOr = fTrue; } if ( fJET_efvUseEngineDefault ) { - OSStrCbAppendFormatW( wszEfvSetting, cbEfvSetting, L"%hs%hs", fNeedOr ? " | " : "", "JET_efvUseEngineDefault" ); + AppendFormatW( wszEfvSetting, cbEfvSetting, L"%hs%hs", fNeedOr ? " | " : "", "JET_efvUseEngineDefault" ); fNeedOr = fTrue; } if ( fJET_efvUsePersistedFormat ) { - OSStrCbAppendFormatW( wszEfvSetting, cbEfvSetting, L"%hs%hs", fNeedOr ? " | " : "", "JET_efvUsePersistedFormat" ); + AppendFormatW( wszEfvSetting, cbEfvSetting, L"%hs%hs", fNeedOr ? " | " : "", "JET_efvUsePersistedFormat" ); fNeedOr = fTrue; } if ( fJET_efvAllowHigherPersistedFormat ) { - OSStrCbAppendFormatW( wszEfvSetting, cbEfvSetting, L"%hs%hs", fNeedOr ? " | " : "", "JET_efvAllowHigherPersistedFormat" ); + AppendFormatW( wszEfvSetting, cbEfvSetting, L"%hs%hs", fNeedOr ? " | " : "", "JET_efvAllowHigherPersistedFormat" ); fNeedOr = fTrue; } OSStrCbAppendW( wszEfvSetting, cbEfvSetting, L")" ); diff --git a/dev/ese/src/ese/ver.cxx b/dev/ese/src/ese/ver.cxx index ba0fb266..050fa6b5 100644 --- a/dev/ese/src/ese/ver.cxx +++ b/dev/ese/src/ese/ver.cxx @@ -7,7 +7,6 @@ #include "PageSizeClean.hxx" - ///#define BREAK_ON_PREFERRED_BUCKET_LIMIT #ifdef DEBUG @@ -174,8 +173,8 @@ VER::VER( INST *pinst ) : CZeroInit( sizeof( VER ) ), m_pinst( pinst ), m_fVERCleanUpWait( 2 ), - m_msigRCECleanPerformedRecently( CSyncBasicInfo( _T( "m_msigRCECleanPerformedRecently" ) ) ), - m_asigRCECleanDone( CSyncBasicInfo( _T( "m_asigRCECleanDone" ) ) ), + m_msigRCECleanPerformedRecently( CSyncBasicInfo( "m_msigRCECleanPerformedRecently" ) ), + m_asigRCECleanDone( CSyncBasicInfo( "m_asigRCECleanDone" ) ), m_critRCEClean( CLockBasicInfo( CSyncBasicInfo( szRCEClean ), rankRCEClean, 0 ) ), m_critBucketGlobal( CLockBasicInfo( CSyncBasicInfo( szBucketGlobal ), rankBucketGlobal, 0 ) ), #ifdef VERPERF diff --git a/dev/ese/src/eseutil/_edbutil.hxx b/dev/ese/src/eseutil/_edbutil.hxx index 8c7e633b..be0c87b5 100644 --- a/dev/ese/src/eseutil/_edbutil.hxx +++ b/dev/ese/src/eseutil/_edbutil.hxx @@ -5,13 +5,6 @@ #include #include #include -#pragma prefast(push) -#pragma prefast(disable:26006, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:26007, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:28718, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:28726, "Dont bother us with tchar, someone else owns that.") -#include -#pragma prefast(pop) #include "os.hxx" diff --git a/dev/ese/src/eseutil/dbspacedump.cxx b/dev/ese/src/eseutil/dbspacedump.cxx index 31cca67e..64864058 100644 --- a/dev/ese/src/eseutil/dbspacedump.cxx +++ b/dev/ese/src/eseutil/dbspacedump.cxx @@ -1466,8 +1466,7 @@ JET_ERR ErrPrintField( if ( pBTStats->pBasicCatalog->pSpaceHints ) { WCHAR wszSpaceHintsGrbits[12]; - const JET_ERR err = ErrOSStrCbFormatW( wszSpaceHintsGrbits, sizeof(wszSpaceHintsGrbits), L"0x%x", pBTStats->pBasicCatalog->pSpaceHints->grbit ); - assert( JET_errSuccess == err ); + OSStrCbFormatW( wszSpaceHintsGrbits, sizeof(wszSpaceHintsGrbits), L"0x%x", pBTStats->pBasicCatalog->pSpaceHints->grbit ); assert( rgSpaceFields[eField].cchFieldSize >= (ULONG)LOSStrLengthW( wszSpaceHintsGrbits ) ); wprintf( L"%ws%ws", WszFillBuffer( L' ', rgSpaceFields[eField].cchFieldSize - LOSStrLengthW( wszSpaceHintsGrbits ) ), wszSpaceHintsGrbits ); } @@ -1784,10 +1783,8 @@ JET_ERR ErrSpaceDumpCtxSetFields( wszTemp[0] = L'\0'; for( ULONG eField = 1; eField < sizeof(rgSpaceFields)/sizeof(rgSpaceFields[0]); eField++ ) { - ULONG ret = StringCbCatW( (WCHAR*)wszTemp, cb, rgSpaceFields[eField].wszField ); - assert( 0 == ret ); - ret = StringCbCatW( (WCHAR*)wszTemp, cb, L"," ); - assert( 0 == ret ); + OSStrCbAppendW( wszTemp, cb, rgSpaceFields[eField].wszField ); + OSStrCbAppendW( wszTemp, cb, L"," ); } wszFields = wszTemp; } @@ -1924,7 +1921,7 @@ JET_ERR ErrSpaceDumpCtxSetOptions( // We only allow 1 char + NUL separators, likely it was wrong. assert( wszSeparator[0] != L'\0' ); assert( wszSeparator[1] == L'\0' ); - if ( S_OK != StringCbCopyW( pespCtx->rgwchSep, sizeof(pespCtx->rgwchSep), wszSeparator ) ) + if ( JET_errSuccess > ErrOSStrCbCopyW( pespCtx->rgwchSep, sizeof(pespCtx->rgwchSep), wszSeparator ) ) { assertSz( fFalse, "Huh?" ); return ErrERRCheck( JET_errInvalidParameter ); diff --git a/dev/ese/src/eseutil/eseutil.cxx b/dev/ese/src/eseutil/eseutil.cxx index 017d3a23..3af80032 100644 --- a/dev/ese/src/eseutil/eseutil.cxx +++ b/dev/ese/src/eseutil/eseutil.cxx @@ -18,13 +18,6 @@ #include "esefile.hxx" #undef UNICODE // esefile.hxx enables UNICODE -#pragma prefast(push) -#pragma prefast(disable:28196, "Do not bother us with strsafe, someone else owns that.") -#pragma prefast(disable:28205, "Do not bother us with strsafe, someone else owns that.") -#include -#pragma prefast(pop) - - #ifndef ESENT #include #define ESEBCLI2_DLL_NAME L"ESEBCLI2.DLL" @@ -2112,11 +2105,17 @@ LOCAL BOOL FEDBUTLParseRepair( _In_ PCWSTR arg, UTILOPTS *popts ) LOCAL VOID EDBUTLGetBaseName( _In_ PCWSTR const wszLogfile, __out_ecount(4) WCHAR * const wszBaseName ) { WCHAR wszNameT[_MAX_FNAME+1]; + ERR errT; assert( wszBaseName != NULL ); _wsplitpath_s( wszLogfile, NULL, 0, NULL, 0, wszNameT, _countof(wszNameT), NULL, 0); - StringCbCopyW( wszBaseName, 4 * sizeof(WCHAR), wszNameT ); + errT = ErrOSStrCbCopyW( wszBaseName, 4 * sizeof(WCHAR), wszNameT ); + // We're not telling the truth about how big wszBaseName is, we're instead artificially + // shortening it to 4 WCHARs in order to only copy 4 WCHARs from wszNameT. We would + // use OSSStrCbCopyNW(), if it existed. The end result is that we might get success + // back and we might get buffer too small, depending on the length of wszNameT + assert( errT == JET_errSuccess || errT == JET_errBufferTooSmall ); } LOCAL BOOL FEDBUTLBaseNameOnly( const WCHAR * const wszName ) @@ -2404,7 +2403,7 @@ LOCAL BOOL FEDBUTLParseDump( _In_ PCWSTR arg, UTILOPTS *popts ) { const size_t cchSzNode = 256; WCHAR wszNode[cchSzNode]; - StringCbCopyW( wszNode, sizeof(wszNode), arg+2 ); + OSStrCbCopyW( wszNode, sizeof(wszNode), arg+2 ); INT dbid = 0; INT pgno = 0; @@ -2480,7 +2479,7 @@ LOCAL BOOL FEDBUTLParseDump( _In_ PCWSTR arg, UTILOPTS *popts ) { const size_t cchSzLogRange = 256; WCHAR wszLogRange[cchSzLogRange]; - StringCbCopyW( wszLogRange, sizeof( wszLogRange ), arg + 2 ); + OSStrCbCopyW( wszLogRange, sizeof( wszLogRange ), arg + 2 ); LONG lgenStart = 0; LONG lgenEnd = 0; @@ -3408,12 +3407,12 @@ LOCAL JET_ERR ErrEDBUTLBackupAndInstateDB( _wsplitpath_s( popts->wszSourceDB, wszDrive, _countof(wszDrive), wszDir, _countof(wszDir), NULL, 0, NULL, 0 ); _wmakepath_s( wszSourceDB, _countof(wszSourceDB), wszDrive, wszDir, NULL, NULL ); - StringCbCatW( wszSourceDB, sizeof(wszSourceDB), wfd.cFileName ); + OSStrCbAppendW( wszSourceDB, sizeof(wszSourceDB), wfd.cFileName ); FindClose( hFind ); } else { - StringCbCopyW( wszSourceDB, sizeof(wszSourceDB), popts->wszSourceDB ); + OSStrCbCopyW( wszSourceDB, sizeof(wszSourceDB), popts->wszSourceDB ); } // Make backup before instating, if requested. @@ -3671,7 +3670,7 @@ ERR ErrPrintESEBCLI2Error ( HRESULT hr, HRESULT hrGLE, HMODULE hESEBCLI2 ) wszFinalMsg = (WCHAR *) LocalAlloc( LMEM_FIXED | LMEM_ZEROINIT, cbFinalMsg ); if ( wszFinalMsg ) { - StringCbPrintfW( wszFinalMsg, cbFinalMsg, (WCHAR *)lpMsgBuf, hrGLE ); + OSStrCbFormatW( wszFinalMsg, cbFinalMsg, (WCHAR *)lpMsgBuf, hrGLE ); LocalFree( lpMsgBuf ); } else @@ -3746,7 +3745,7 @@ WCHAR * WszCopy( const WCHAR * wsz ) if ( ( wszCopy = (WCHAR *) LocalAlloc( LMEM_FIXED | LMEM_ZEROINIT, cb ) ) == NULL ) return(NULL); - StringCbCopyW( wszCopy, cb, wsz ); + OSStrCbCopyW( wszCopy, cb, wsz ); return( wszCopy ); } @@ -3758,7 +3757,7 @@ void FormatErrorInfoString( const JET_ERRINFOBASIC_W * perrinfo ) { Assert( szFormatted[0] == '\0' ); - (void)ErrOSStrCbFormatA( szFormatted, cbFormatted, "err = %d (%ws", err, wszErrStrings ? wszErrStrings : L"unknown, unknown" ); + OSStrCbFormatA( szFormatted, cbFormatted, "err = %d (%ws", err, wszErrStrings ? wszErrStrings : L"unknown, unknown" ); // Truncate off the ", " to get the constant name only. CHAR * const szBeginErrExplanation = strchr( szFormatted, ',' ); Assert( szBeginErrExplanation ); // implies we didn't get the , before we potentially truncated!? @@ -3770,7 +3769,7 @@ void FormatErrorInfoString( #ifdef DEBUG if ( perrinfo && perrinfo->lSourceLine != 0 ) { - (void)ErrOSStrCbFormatA( szFormatted + strlen(szFormatted), cbFormatted - strlen(szFormatted), + OSStrCbFormatA( szFormatted + strlen(szFormatted), cbFormatted - strlen(szFormatted), " - %ws:%d", perrinfo->rgszSourceFile, perrinfo->lSourceLine ); } #endif @@ -3918,7 +3917,7 @@ LOCAL VOID DBUTLIDumpRestoreEnv( RESTORE_ENVIRONMENT * pREnv, INT cDesc = 0 ) PrintField( L"", cDesc, NULL ); - StringCbPrintfW( wszBuffer, sizeof( wszBuffer ), L"%d database(s)", pREnv->m_cDatabases ); + OSStrCbFormatW( wszBuffer, sizeof( wszBuffer ), L"%d database(s)", pREnv->m_cDatabases ); PrintField( L"Databases:", cDesc, wszBuffer ); assert ( pREnv->m_wszDatabaseDisplayName || 0 == pREnv->m_cDatabases); @@ -3939,7 +3938,7 @@ LOCAL VOID DBUTLIDumpRestoreEnv( RESTORE_ENVIRONMENT * pREnv, INT cDesc = 0 ) PrintField( L"Database Name:", cDesc, pREnv->m_wszDatabaseDisplayName[iDb] ); // like: 6B29FC40-CA47-1067-B31D-00DD010662DA - StringCbPrintfW( guidStr, sizeof( guidStr ), + OSStrCbFormatW( guidStr, sizeof( guidStr ), L"%08X-%04X-%04X-%08X%08X", guid.Data1, guid.Data2, guid.Data3, *(DWORD *)&guid.Data3,*( 1 + (DWORD *)&guid.Data3 ) ); @@ -3980,7 +3979,7 @@ LOCAL VOID DBUTLIDumpRestoreEnv( RESTORE_ENVIRONMENT * pREnv, INT cDesc = 0 ) // in restore.env at parsing point (HrESERestoreOpenFile) // For now, we will display the Store defaults as this is // the only client using the eseback2/esebcli2 anyway - StringCbPrintfW( wszBuffer, sizeof( wszBuffer ), L"%s%08X.log - %s%08X.log", + OSStrCbFormatW( wszBuffer, sizeof( wszBuffer ), L"%s%08X.log - %s%08X.log", pREnv->m_wszLogBaseName, pREnv->m_ulGenLow, pREnv->m_wszLogBaseName, @@ -3990,7 +3989,7 @@ LOCAL VOID DBUTLIDumpRestoreEnv( RESTORE_ENVIRONMENT * pREnv, INT cDesc = 0 ) { assert ( 0 == pREnv->m_ulGenLow ); assert ( 0 == pREnv->m_ulGenHigh ); - StringCbPrintfW( wszBuffer, sizeof( wszBuffer ), L"no log files restored"); + OSStrCbFormatW( wszBuffer, sizeof( wszBuffer ), L"no log files restored"); } PrintField( L"Log files range:", cDesc, wszBuffer ); @@ -4003,7 +4002,7 @@ LOCAL VOID DBUTLIDumpRestoreEnv( RESTORE_ENVIRONMENT * pREnv, INT cDesc = 0 ) PrintField( L"Recover Status:", cDesc, rwszRecoverStatus [ status ] ); - StringCbPrintfW( wszBuffer, sizeof( wszBuffer ), L"0x%08X", pREnv->m_hrLastRecover); + OSStrCbFormatW( wszBuffer, sizeof( wszBuffer ), L"0x%08X", pREnv->m_hrLastRecover); PrintField( L"Recover Error:", cDesc, wszBuffer ); PrintField( L"Recover Time:", cDesc, ( 0 != pREnv->m_timeLastRecover ? _wctime( &pREnv->m_timeLastRecover ) : L"" ) ); @@ -5315,7 +5314,7 @@ LOCAL JET_ERR ErrEDBUTLCheckLogStream( JET_INSTANCE* pInst, UTILOPTS* pOpts ) // file name (with wild card) WCHAR wszFName[ _MAX_PATH + 1 ]; Call( JetGetSystemParameterW( *pInst, JET_sesidNil, JET_paramBaseName, NULL, wszFName, sizeof( wszFName ) ) ); - StringCchCatW( wszFName, _countof( wszFName ), L"*" ); + OSStrCbAppendW( wszFName, sizeof( wszFName ), L"*" ); //================================ // drive and dir @@ -5388,7 +5387,7 @@ void PushEseutilArgTrace( INT argc, __in_ecount(argc) LPWSTR argv[] ) { CHAR * szT = szEseutilCmd; - (void)ErrOSStrCbFormatA( szT, cbAllArgs, "Command: \"" ); // 5 spaces is _perfect_. Excercise to reader as to why. + OSStrCbFormatA( szT, cbAllArgs, "Command: \"" ); // 5 spaces is _perfect_. Excercise to reader as to why. size_t cbUsed = strlen( szT ); szT += cbUsed; cbAllArgs -= cbUsed; @@ -5396,7 +5395,7 @@ void PushEseutilArgTrace( INT argc, __in_ecount(argc) LPWSTR argv[] ) for( INT iarg = 0; iarg < argc; iarg++ ) { // Not sure if MBCS can expand this, but left buffer anyways, may truncate but we'd survive. - (void)ErrOSStrCbFormatA( szT, cbAllArgs, "%ws ", argv[iarg] ); + OSStrCbFormatA( szT, cbAllArgs, "%ws ", argv[iarg] ); cbUsed = strlen( szT ); szT += cbUsed; cbAllArgs -= cbUsed; @@ -5404,7 +5403,7 @@ void PushEseutilArgTrace( INT argc, __in_ecount(argc) LPWSTR argv[] ) if ( argc ) { // if we had any args wipe out the last space with the end quote. - (void)ErrOSStrCbFormatA( szT - 1, cbAllArgs + 1, "\"" ); + OSStrCbFormatA( szT - 1, cbAllArgs + 1, "\"" ); } JET_TESTHOOKTRACETESTMARKER mark = { sizeof(mark), szEseutilCmd, 1 }; JetTestHook( opTestHookTraceTestMarker, &mark ); diff --git a/dev/ese/src/inc/_jet.hxx b/dev/ese/src/inc/_jet.hxx index b729e2c3..280f6b42 100644 --- a/dev/ese/src/inc/_jet.hxx +++ b/dev/ese/src/inc/_jet.hxx @@ -462,7 +462,7 @@ typedef CAutoIWSZ< 90 > CAutoWSZPATH; { \ err = (func); \ } \ - EXCEPT( GrbitParam( JET_paramExceptionAction ) != JET_ExceptionNone ? ExceptionFail( _T( "API" ) ) : efaContinueSearch ) \ + EXCEPT( GrbitParam( JET_paramExceptionAction ) != JET_ExceptionNone ? ExceptionFail( "API" ) : efaContinueSearch ) \ { \ AssertPREFIX( !"This code path should be impossible (the exception-handler should have terminated the process)." ); \ err = ErrERRCheck( JET_errInternalError ); \ @@ -490,9 +490,9 @@ typedef CAutoIWSZ< 90 > CAutoWSZPATH; TRY \ { \ err = (func); \ - OSTrace( JET_tracetagAPI, OSFormat( "End %s with error %d (0x%x)", _T( #func ), err, err ) ); \ + OSTrace( JET_tracetagAPI, OSFormat( "End %s with error %d (0x%x)", #func, err, err ) ); \ } \ - EXCEPT( GrbitParam( JET_paramExceptionAction ) != JET_ExceptionNone ? ExceptionFail( _T( #func ) ) : efaContinueSearch ) \ + EXCEPT( GrbitParam( JET_paramExceptionAction ) != JET_ExceptionNone ? ExceptionFail( #func ) : efaContinueSearch ) \ { \ AssertPREFIX( !"This code path should be impossible (the exception-handler should have terminated the process)." ); \ err = ErrERRCheck( JET_errInternalError ); \ @@ -519,7 +519,7 @@ typedef CAutoIWSZ< 90 > CAutoWSZPATH; OSEventTrace( _etguidApiCall_Start, 1, &ulTraceApiId ); \ CLockDeadlockDetectionInfo::GetApiEntryState(&cDisableDeadlockCheck, &cDisableOwnershipCheck, &cLocks); \ const JET_ERR err = (func); \ - OSTrace( JET_tracetagAPI, OSFormat( "End %s with error %d (0x%x)", _T( #func ), err, err ) ); \ + OSTrace( JET_tracetagAPI, OSFormat( "End %s with error %d (0x%x)", #func, err, err ) ); \ AssertRTL( err > -65536 && err < 65536 ); \ fDisableLockCheck ? CLockDeadlockDetectionInfo::DisableLockCheckOnApiExit() : 0;\ CLockDeadlockDetectionInfo::AssertCleanApiExit(cDisableDeadlockCheck, cDisableOwnershipCheck, cLocks); \ diff --git a/dev/ese/src/inc/_osu/syncu.hxx b/dev/ese/src/inc/_osu/syncu.hxx index c62eeda1..8aadef7f 100644 --- a/dev/ese/src/inc/_osu/syncu.hxx +++ b/dev/ese/src/inc/_osu/syncu.hxx @@ -221,7 +221,7 @@ class CRITPOOL ~CRITPOOL( void ); CRITPOOL& operator=( CRITPOOL& ); // disallowed - BOOL FInit( const LONG cThread, const INT rank, const _TCHAR* szName ); + BOOL FInit( const LONG cThread, const INT rank, const CHAR* szName ); void Term( void ); CCriticalSection& Crit( const T* const pt ); @@ -260,7 +260,7 @@ CRITPOOL::~CRITPOOL( void ) // fTrue on success or fFalse on failure template -BOOL CRITPOOL::FInit( const LONG cThread, const INT rank, const _TCHAR* szName ) +BOOL CRITPOOL::FInit( const LONG cThread, const INT rank, const CHAR* szName ) { // ensure that Term() was called or ErrInit() was never called @@ -360,7 +360,7 @@ class RWLPOOL ~RWLPOOL( void ); RWLPOOL& operator=( RWLPOOL& ); // disallowed - BOOL FInit( const LONG cThread, const LONG cbObjectSize, const INT rank, const _TCHAR* szName ); + BOOL FInit( const LONG cThread, const LONG cbObjectSize, const INT rank, const CHAR* szName ); void Term( void ); CReaderWriterLock& Rwl( const VOID* const pObj ); @@ -398,7 +398,7 @@ INLINE RWLPOOL::~RWLPOOL( void ) // initializes the reader writer lock pool for use by cThread threads, returning // fTrue on success or fFalse on failure -INLINE BOOL RWLPOOL::FInit( const LONG cThread, const LONG cbObjectSize, const INT rank, const _TCHAR* szName ) +INLINE BOOL RWLPOOL::FInit( const LONG cThread, const LONG cbObjectSize, const INT rank, const CHAR* szName ) { // ensure that Term() was called or ErrInit() was never called diff --git a/dev/ese/src/inc/cpage.hxx b/dev/ese/src/inc/cpage.hxx index 09b96c11..7e6892cd 100644 --- a/dev/ese/src/inc/cpage.hxx +++ b/dev/ese/src/inc/cpage.hxx @@ -624,7 +624,7 @@ class CPAGE struct PGHDR2; typedef INT (*PFNVISITNODE)( const CPAGE::PGHDR * const ppghdr, INT itag, DWORD fNodeFlags, const KEYDATAFLAGS * const pkdf, void * pvCtx ); - VOID DumpAllocMap_ ( _TCHAR * rgchBuf, CPRINTF * pcprintf ) const; + VOID DumpAllocMap_ ( CHAR * rgchBuf, CPRINTF * pcprintf ) const; ERR DumpAllocMap ( CPRINTF * pcprintf ) const; ERR DumpTags ( CPRINTF * pcprintf, DWORD_PTR dwOffset = 0 ) const; VOID DumpTag ( CPRINTF * pcprintf, const INT itag, const DWORD_PTR dwOffset ) const; diff --git a/dev/ese/src/inc/daedef.hxx b/dev/ese/src/inc/daedef.hxx index 66b48946..8c44620d 100644 --- a/dev/ese/src/inc/daedef.hxx +++ b/dev/ese/src/inc/daedef.hxx @@ -3260,12 +3260,12 @@ INLINE ERR SigToSz( const SIGNATURE * const psig, __out_bcount(cbSigBuffer) PSTR LOGTIME tm = psig->logtimeCreate; const char * szSigFormat = "Create time:%02d/%02d/%04d %02d:%02d:%02d.%3.3d Rand:%lu Computer:%s"; - ErrOSStrCbFormatA( szSigBuffer, cbSigBuffer, szSigFormat, - (SHORT) tm.bMonth, (SHORT) tm.bDay, (SHORT) tm.bYear + 1900, - (SHORT) tm.bHours, (SHORT) tm.bMinutes, (SHORT) tm.bSeconds, - (SHORT) tm.Milliseconds(), - ULONG(psig->le_ulRandom), - psig->szComputerName ); + OSStrCbFormatA( szSigBuffer, cbSigBuffer, szSigFormat, + (SHORT) tm.bMonth, (SHORT) tm.bDay, (SHORT) tm.bYear + 1900, + (SHORT) tm.bHours, (SHORT) tm.bMinutes, (SHORT) tm.bSeconds, + (SHORT) tm.Milliseconds(), + ULONG(psig->le_ulRandom), + psig->szComputerName ); return(JET_errSuccess); } @@ -3968,8 +3968,8 @@ public: m_state( stateStart ), m_thread( 0 ), m_errFreeze ( JET_errSuccess ), - m_asigSnapshotThread( CSyncBasicInfo( _T( "asigSnapshotThread" ) ) ), - m_asigSnapshotStarted( CSyncBasicInfo( _T( "asigSnapshotStarted" ) ) ), + m_asigSnapshotThread( CSyncBasicInfo( "asigSnapshotThread" ) ), + m_asigSnapshotStarted( CSyncBasicInfo( "asigSnapshotStarted" ) ), m_fFreezeAllInstances ( fFalse ), m_ipinstCurrent ( 0 ), m_fFlags ( 0 ) @@ -3995,8 +3995,8 @@ private: m_state( stateStart ), m_thread( 0 ), m_errFreeze ( JET_errSuccess ), - m_asigSnapshotThread( CSyncBasicInfo( _T( "asigSnapshotThread" ) ) ), - m_asigSnapshotStarted( CSyncBasicInfo( _T( "asigSnapshotStarted" ) ) ), + m_asigSnapshotThread( CSyncBasicInfo( "asigSnapshotThread" ) ), + m_asigSnapshotStarted( CSyncBasicInfo( "asigSnapshotStarted" ) ), m_fFreezeAllInstances ( fFalse ), m_ipinstCurrent ( 0 ), m_fFlags ( 0 ) diff --git a/dev/ese/src/inc/dataserializer.hxx b/dev/ese/src/inc/dataserializer.hxx index b7d5d7a4..e28538f4 100644 --- a/dev/ese/src/inc/dataserializer.hxx +++ b/dev/ese/src/inc/dataserializer.hxx @@ -158,7 +158,7 @@ public: Assert(ppv); Assert(pcb); *ppv = m_sz; - (VOID) StringCbLengthA(m_sz, m_cchMax, pcb); + *pcb = LOSStrLengthA( m_sz, m_cchMax ); } virtual ERR ErrSetFromPvCb(const void * const pv, const size_t cb) diff --git a/dev/ese/src/inc/esestd.hxx b/dev/ese/src/inc/esestd.hxx index f98e78b6..ddb94878 100644 --- a/dev/ese/src/inc/esestd.hxx +++ b/dev/ese/src/inc/esestd.hxx @@ -8,13 +8,6 @@ #define _CRT_RAND_S #include #include -#pragma prefast(push) -#pragma prefast(disable:26006, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:26007, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:28718, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:28726, "Dont bother us with tchar, someone else owns that.") -#include -#pragma prefast(pop) #include #include #include @@ -34,12 +27,6 @@ typedef __nullterminated const wchar_t* PCWSTR; #include using namespace std; -#pragma prefast(push) -#pragma prefast(disable:28196, "Do not bother us with strsafe, someone else owns that.") -#pragma prefast(disable:28205, "Do not bother us with strsafe, someone else owns that.") -#include -#pragma prefast(pop) - // some security macros taken from exwarning.h // Here are three macros which correctly answer the question: "How many BYTEs (TCHARs, WCHARs) diff --git a/dev/ese/src/inc/log.hxx b/dev/ese/src/inc/log.hxx index 9225fe76..27c685a0 100644 --- a/dev/ese/src/inc/log.hxx +++ b/dev/ese/src/inc/log.hxx @@ -446,7 +446,7 @@ UINT CbLGFixedSizeOfRec( const LR * ); VOID AssertLRSizesConsistent(); #endif -ERR ErrLrToLogCsvSimple( CWPRINTFFILE * pcwpfCsvOut, LGPOS lgpos, const LR *plr, LOG * plog ); +ERR ErrLrToLogCsvSimple( CPRINTFFILE * pcpfCsvOutW, LGPOS lgpos, const LR *plr, LOG * plog ); BOOL FLGDebugLogRec( LR *plr ); @@ -1524,7 +1524,7 @@ public: FLAG32 m_fSummary : 1; // output the IO summary at end of log dumps }; }; - CWPRINTFFILE* m_pcwpfCsvOut; // non-NULL indicates do CSV output. + CPRINTFFILE* m_pcpfCsvOutW; // non-NULL indicates do CSV output. } LOGDUMP_OP; diff --git a/dev/ese/src/inc/old.hxx b/dev/ese/src/inc/old.hxx index c8a715ee..dc9b31bb 100644 --- a/dev/ese/src/inc/old.hxx +++ b/dev/ese/src/inc/old.hxx @@ -6,7 +6,7 @@ class OLD_STATUS_ // make constructor protected so that this class may // only be used as a base class protected: - OLD_STATUS_() : m_asig( CSyncBasicInfo( _T( "asigOLD" ) ) ) + OLD_STATUS_() : m_asig( CSyncBasicInfo( "asigOLD" ) ) { Reset_(); } public: diff --git a/dev/ese/src/inc/pib.hxx b/dev/ese/src/inc/pib.hxx index 9ab5e678..5be50fd0 100644 --- a/dev/ese/src/inc/pib.hxx +++ b/dev/ese/src/inc/pib.hxx @@ -167,7 +167,7 @@ public: critLogBeginTrx( CLockBasicInfo( CSyncBasicInfo( szPIBLogBeginTrx ), rankPIBLogBeginTrx, 0 ) ), ptlsApi( NULL ), ptlsTrxBeginLast( Ptls() ), - asigWaitLogWrite( CSyncBasicInfo( _T( "PIB::asigWaitLogWrite" ) ) ), + asigWaitLogWrite( CSyncBasicInfo( "PIB::asigWaitLogWrite" ) ), m_level( 0 ), m_critCachePriority( CLockBasicInfo( CSyncBasicInfo( szPIBCachePriority ), rankPIBCachePriority, 0 ) ), m_pctCachePriority( g_pctCachePriorityUnassigned ), diff --git a/dev/ese/src/noncore/blockcache/interop/CCacheConfigurationWrapper.h b/dev/ese/src/noncore/blockcache/interop/CCacheConfigurationWrapper.h index fe2e5f1a..443eebe8 100644 --- a/dev/ese/src/noncore/blockcache/interop/CCacheConfigurationWrapper.h +++ b/dev/ese/src/noncore/blockcache/interop/CCacheConfigurationWrapper.h @@ -40,7 +40,7 @@ namespace Internal { String^ absPath = I()->Path(); pin_ptr wszAbsPathT = PtrToStringChars( absPath ); - OSStrCbCopyW( wszAbsPath, cbOSFSAPI_MAX_PATHW, (STRSAFE_LPCWSTR)wszAbsPathT ); + OSStrCbCopyW( wszAbsPath, cbOSFSAPI_MAX_PATHW, wszAbsPathT ); } QWORD CbMaximumSize() override diff --git a/dev/ese/src/noncore/blockcache/interop/CCachedFileConfigurationWrapper.h b/dev/ese/src/noncore/blockcache/interop/CCachedFileConfigurationWrapper.h index 0da208f8..27e9ae7b 100644 --- a/dev/ese/src/noncore/blockcache/interop/CCachedFileConfigurationWrapper.h +++ b/dev/ese/src/noncore/blockcache/interop/CCachedFileConfigurationWrapper.h @@ -41,7 +41,7 @@ namespace Internal { String^ absPath = I()->CachingFilePath(); pin_ptr wszAbsPathT = PtrToStringChars( absPath ); - OSStrCbCopyW( wszAbsPath, cbOSFSAPI_MAX_PATHW, (STRSAFE_LPCWSTR)wszAbsPathT ); + OSStrCbCopyW( wszAbsPath, cbOSFSAPI_MAX_PATHW, wszAbsPathT ); } template< class TM, class TN > diff --git a/dev/ese/src/noncore/blockcache/interop/CFileFindWrapper.h b/dev/ese/src/noncore/blockcache/interop/CFileFindWrapper.h index 9150fd78..eb63850d 100644 --- a/dev/ese/src/noncore/blockcache/interop/CFileFindWrapper.h +++ b/dev/ese/src/noncore/blockcache/interop/CFileFindWrapper.h @@ -76,7 +76,7 @@ namespace Internal ExCall( foundPath = I()->Path() ); pin_ptr wszFoundPath = PtrToStringChars( foundPath ); - Call( ErrOSStrCbCopyW( wszAbsFoundPath, cbOSFSAPI_MAX_PATHW, (STRSAFE_LPCWSTR)wszFoundPath ) ); + Call( ErrOSStrCbCopyW( wszAbsFoundPath, cbOSFSAPI_MAX_PATHW, wszFoundPath ) ); HandleError: if ( err < JET_errSuccess ) diff --git a/dev/ese/src/noncore/blockcache/interop/CFileIdentificationWrapper.h b/dev/ese/src/noncore/blockcache/interop/CFileIdentificationWrapper.h index cd7678d5..75f27022 100644 --- a/dev/ese/src/noncore/blockcache/interop/CFileIdentificationWrapper.h +++ b/dev/ese/src/noncore/blockcache/interop/CFileIdentificationWrapper.h @@ -75,7 +75,7 @@ namespace Internal if ( keyPath != nullptr ) { pin_ptr wszKeyPathT = PtrToStringChars( keyPath ); - Call( ErrOSStrCbCopyW( wszKeyPath, cbOSFSAPI_MAX_PATHW, (STRSAFE_LPCWSTR)wszKeyPathT ) ); + Call( ErrOSStrCbCopyW( wszKeyPath, cbOSFSAPI_MAX_PATHW, wszKeyPathT ) ); } HandleError: @@ -102,9 +102,9 @@ namespace Internal ExCall( I()->GetFilePathById( (VolumeId)volumeid, (FileId)(Int64)fileid, anyAbsPath, keyPath ) ); pin_ptr wszAnyAbsPathT = PtrToStringChars( anyAbsPath ); - Call( ErrOSStrCbCopyW( wszAnyAbsPath, cbOSFSAPI_MAX_PATHW, (STRSAFE_LPCWSTR)wszAnyAbsPathT ) ); + Call( ErrOSStrCbCopyW( wszAnyAbsPath, cbOSFSAPI_MAX_PATHW, wszAnyAbsPathT ) ); pin_ptr wszKeyPathT = PtrToStringChars( keyPath ); - Call( ErrOSStrCbCopyW( wszKeyPath, cbOSFSAPI_MAX_PATHW, (STRSAFE_LPCWSTR)wszKeyPathT ) ); + Call( ErrOSStrCbCopyW( wszKeyPath, cbOSFSAPI_MAX_PATHW, wszKeyPathT ) ); HandleError: if ( err < JET_errSuccess ) diff --git a/dev/ese/src/noncore/blockcache/interop/CFileSystemWrapper.h b/dev/ese/src/noncore/blockcache/interop/CFileSystemWrapper.h index d9445069..363728cd 100644 --- a/dev/ese/src/noncore/blockcache/interop/CFileSystemWrapper.h +++ b/dev/ese/src/noncore/blockcache/interop/CFileSystemWrapper.h @@ -227,7 +227,7 @@ namespace Internal ExCall( absRootPath = I()->PathRoot( gcnew String( wszPath ) ) ); pin_ptr wszAbsRootPathT = PtrToStringChars( absRootPath ); - Call( ErrOSStrCbCopyW( wszAbsRootPath, OSFSAPI_MAX_PATH * sizeof( WCHAR ), (STRSAFE_LPCWSTR)wszAbsRootPathT ) ); + Call( ErrOSStrCbCopyW( wszAbsRootPath, OSFSAPI_MAX_PATH * sizeof( WCHAR ), wszAbsRootPathT ) ); HandleError: if ( err < JET_errSuccess ) @@ -255,9 +255,9 @@ namespace Internal diskNumber ); pin_ptr wszVolumeCanonicalPathT = PtrToStringChars( volumeCanonicalPath ); - OSStrCbCopyW( wszVolumeCanonicalPath, cchVolumeCanonicalPath * sizeof( WCHAR ), (STRSAFE_LPCWSTR)wszVolumeCanonicalPathT ); + OSStrCbCopyW( wszVolumeCanonicalPath, cchVolumeCanonicalPath * sizeof( WCHAR ), wszVolumeCanonicalPathT ); pin_ptr wszDiskIdT = PtrToStringChars( diskId ); - OSStrCbCopyW( wszDiskId, cchDiskId * sizeof( WCHAR ), (STRSAFE_LPCWSTR)wszDiskIdT ); + OSStrCbCopyW( wszDiskId, cchDiskId * sizeof( WCHAR ), wszDiskIdT ); *pdwDiskNumber = diskNumber; } @@ -273,7 +273,7 @@ namespace Internal ExCall( absPath = I()->PathComplete( gcnew String( wszPath ) ) ); pin_ptr wszAbsPathT = PtrToStringChars( absPath ); - Call( ErrOSStrCbCopyW( wszAbsPath, OSFSAPI_MAX_PATH * sizeof( WCHAR ), (STRSAFE_LPCWSTR)wszAbsPathT ) ); + Call( ErrOSStrCbCopyW( wszAbsPath, OSFSAPI_MAX_PATH * sizeof( WCHAR ), wszAbsPathT ) ); HandleError: if ( err < JET_errSuccess ) @@ -312,17 +312,17 @@ namespace Internal if ( wszFolder && folder != nullptr ) { pin_ptr wszFolderT = PtrToStringChars( folder ); - Call( ErrOSStrCbCopyW( wszFolder, OSFSAPI_MAX_PATH * sizeof( WCHAR ), (STRSAFE_LPCWSTR)wszFolderT ) ); + Call( ErrOSStrCbCopyW( wszFolder, OSFSAPI_MAX_PATH * sizeof( WCHAR ), wszFolderT ) ); } if ( wszFileBase && fileBase != nullptr ) { pin_ptr wszFileBaseT = PtrToStringChars( fileBase ); - Call( ErrOSStrCbCopyW( wszFileBase, OSFSAPI_MAX_PATH * sizeof( WCHAR ), (STRSAFE_LPCWSTR)wszFileBaseT ) ); + Call( ErrOSStrCbCopyW( wszFileBase, OSFSAPI_MAX_PATH * sizeof( WCHAR ), wszFileBaseT ) ); } if ( wszFileExt && fileExt != nullptr ) { pin_ptr wszFileExtT = PtrToStringChars( fileExt ); - Call( ErrOSStrCbCopyW( wszFileExt, OSFSAPI_MAX_PATH * sizeof( WCHAR ), (STRSAFE_LPCWSTR)wszFileExtT ) ); + Call( ErrOSStrCbCopyW( wszFileExt, OSFSAPI_MAX_PATH * sizeof( WCHAR ), wszFileExtT ) ); } HandleError: @@ -386,7 +386,7 @@ namespace Internal gcnew String( wszFileExt ) ) ); pin_ptr wszPathT = PtrToStringChars( path ); - Call( ErrOSStrCbCopyW( wszPath, cbPath, (STRSAFE_LPCWSTR)wszPathT ) ); + Call( ErrOSStrCbCopyW( wszPath, cbPath, wszPathT ) ); HandleError: if ( err < JET_errSuccess ) @@ -410,7 +410,7 @@ namespace Internal ExCall( folderOut = I()->PathFolderNorm( folderIn ) ); pin_ptr wszFolderT = PtrToStringChars( folderOut ); - Call( ErrOSStrCbCopyW( wszFolder, cbSize, (STRSAFE_LPCWSTR)wszFolderT ) ); + Call( ErrOSStrCbCopyW( wszFolder, cbSize, wszFolderT ) ); HandleError: return err; @@ -469,7 +469,7 @@ namespace Internal ExCall( folder = I()->PathFolderDefault( fCanProcessUseRelativePaths ) ); pin_ptr wszFolderT = PtrToStringChars( folder ); - Call( ErrOSStrCbCopyW( wszFolder, cbFolder, (STRSAFE_LPCWSTR)wszFolderT ) ); + Call( ErrOSStrCbCopyW( wszFolder, cbFolder, wszFolderT ) ); *pfCanProcessUseRelativePaths = fCanProcessUseRelativePaths ? fTrue : fFalse; HandleError: @@ -499,7 +499,7 @@ namespace Internal ExCall( folder = I()->GetTempFolder() ); pin_ptr wszFolderT = PtrToStringChars( folder ); - Call( ErrOSStrCbCopyW( wszFolder, cchFolder * sizeof( WCHAR ), (STRSAFE_LPCWSTR)wszFolderT ) ); + Call( ErrOSStrCbCopyW( wszFolder, cchFolder * sizeof( WCHAR ), wszFolderT ) ); HandleError: if ( err < JET_errSuccess ) @@ -525,7 +525,7 @@ namespace Internal ExCall( filename = I()->GetTempFileName( gcnew String( wszFolder ), gcnew String( wszPrefix ) ) ); pin_ptr wszFileNameT = PtrToStringChars( filename ); - Call( ErrOSStrCbCopyW( wszFileName, OSFSAPI_MAX_PATH * sizeof( WCHAR ), (STRSAFE_LPCWSTR)wszFileNameT ) ); + Call( ErrOSStrCbCopyW( wszFileName, OSFSAPI_MAX_PATH * sizeof( WCHAR ), wszFileNameT ) ); HandleError: if ( err < JET_errSuccess ) diff --git a/dev/ese/src/noncore/blockcache/interop/CFileWrapper.h b/dev/ese/src/noncore/blockcache/interop/CFileWrapper.h index 476bf6cb..3f594038 100644 --- a/dev/ese/src/noncore/blockcache/interop/CFileWrapper.h +++ b/dev/ese/src/noncore/blockcache/interop/CFileWrapper.h @@ -151,7 +151,7 @@ namespace Internal ExCall( absPath = I()->Path() ); pin_ptr wszAbsPathT = PtrToStringChars( absPath ); - Call( ErrOSStrCbCopyW( wszAbsPath, cbOSFSAPI_MAX_PATHW, (STRSAFE_LPCWSTR)wszAbsPathT ) ); + Call( ErrOSStrCbCopyW( wszAbsPath, cbOSFSAPI_MAX_PATHW, wszAbsPathT ) ); HandleError: if ( err < JET_errSuccess ) diff --git a/dev/ese/src/noncore/blockcache/interop/Common.h b/dev/ese/src/noncore/blockcache/interop/Common.h index 137ed589..a1d881b0 100644 --- a/dev/ese/src/noncore/blockcache/interop/Common.h +++ b/dev/ese/src/noncore/blockcache/interop/Common.h @@ -24,8 +24,6 @@ #include - -#include #include "os.hxx" #include "tcconst.hxx" diff --git a/dev/ese/src/noncore/blockcache/interop/FileSystemBase.h b/dev/ese/src/noncore/blockcache/interop/FileSystemBase.h index 44f58ca9..537bb5fd 100644 --- a/dev/ese/src/noncore/blockcache/interop/FileSystemBase.h +++ b/dev/ese/src/noncore/blockcache/interop/FileSystemBase.h @@ -257,7 +257,7 @@ namespace Internal if ( path ) { pin_ptr wszPathT = PtrToStringChars( path ); - Call( ErrOSStrCbCopyW( wszPathStorage, _cbrg( wszPathStorage ), (STRSAFE_LPCWSTR)wszPathT ) ); + Call( ErrOSStrCbCopyW( wszPathStorage, _cbrg( wszPathStorage ), wszPathT ) ); wszPath = wszPathStorage; } @@ -295,7 +295,7 @@ namespace Internal WCHAR wszFolder[ OSFSAPI_MAX_PATH ] = { 0 }; pin_ptr wszFolderT = PtrToStringChars( folder ); - Call( ErrOSStrCbCopyW( wszFolder, _cbrg( wszFolder ), (STRSAFE_LPCWSTR)wszFolderT ) ); + Call( ErrOSStrCbCopyW( wszFolder, _cbrg( wszFolder ), wszFolderT ) ); Call( Pi->ErrPathFolderNorm( wszFolder, _cbrg( wszFolder ) ) ); diff --git a/dev/ese/src/noncore/eseshadow/esewriter.hxx b/dev/ese/src/noncore/eseshadow/esewriter.hxx index 11987579..a976ebaa 100644 --- a/dev/ese/src/noncore/eseshadow/esewriter.hxx +++ b/dev/ese/src/noncore/eseshadow/esewriter.hxx @@ -6,14 +6,6 @@ #ifndef _ESEWRITER_ #define _ESEWRITER_ -#pragma prefast(push) -#pragma prefast(disable:26006, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:26007, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:28718, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:28726, "Dont bother us with tchar, someone else owns that.") -#include -#pragma prefast(pop) - #include #include "_jethdr.h" #include "os.hxx" diff --git a/dev/ese/src/noncore/interop/jetinterop.cpp b/dev/ese/src/noncore/interop/jetinterop.cpp index a01bf8b8..28b89616 100644 --- a/dev/ese/src/noncore/interop/jetinterop.cpp +++ b/dev/ese/src/noncore/interop/jetinterop.cpp @@ -9,6 +9,8 @@ #define _UNICODE #endif #else +// We never really compile without JET_UNICODE, right? +#error 1 #undef UNICODE #undef _UNICODE #endif @@ -17,7 +19,6 @@ #include #include #include -#include #if (ESENT_WINXP) #undef JET_VERSION @@ -2461,7 +2462,7 @@ namespace Isam MJET_INDEXCREATE ic; ic.IndexName = MakeManagedString( _indexCreate.szIndexName ); - ic.Key = MakeManagedString( _indexCreate.szKey, (_indexCreate.cbKey / sizeof(_TCHAR)) - 1 ); // -1 to keep just 1 null-terminator. the next conversion back to an unmanaged string will result in 2 null-terminators (managed strings arent null-terminated) + ic.Key = MakeManagedString( _indexCreate.szKey, (_indexCreate.cbKey / sizeof(wchar_t)) - 1 ); // -1 to keep just 1 null-terminator. the next conversion back to an unmanaged string will result in 2 null-terminators (managed strings arent null-terminated) ic.KeyLengthMax = _indexCreate.cbKeyMost; ic.Grbit = (MJET_GRBIT) _indexCreate.grbit; ic.Density = _indexCreate.ulDensity; @@ -3360,7 +3361,7 @@ namespace Isam virtual MJET_INSTANCE MJetCreateInstance(String^ name) { ::JET_INSTANCE _inst = 0; - _TCHAR * _szInstanceName = 0; + wchar_t * _szInstanceName = 0; try { @@ -3384,8 +3385,8 @@ namespace Isam { ::JET_INSTANCE _inst = 0; ::JET_GRBIT _grbit = ( ::JET_GRBIT ) grbit; - _TCHAR * _szInstanceName = 0; - _TCHAR * _szDisplayName = 0; + wchar_t * _szInstanceName = 0; + wchar_t * _szDisplayName = 0; try { @@ -3479,7 +3480,7 @@ namespace Isam ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); unsigned long _paramid = (unsigned long) paramid; JET_API_PTR _plParam = (JET_API_PTR)param; - _TCHAR * _sz = 0; + wchar_t * _sz = 0; try { @@ -3544,7 +3545,7 @@ namespace Isam unsigned long _paramid = (unsigned long) paramid; JET_API_PTR _plParam = 0; const int _cbMax = 1024; - _TCHAR _sz[_cbMax]; + wchar_t _sz[_cbMax]; Call( ::JetGetSystemParameter( _inst, _sesid, _paramid, &_plParam, _sz, _cbMax ) ); param = (System::Int64) _plParam; @@ -3773,8 +3774,8 @@ namespace Isam { ::JET_INSTANCE _inst = GetUnmanagedInst( instance ); ::JET_SESID _sesid; - _TCHAR * _szUserName = 0; - _TCHAR * _szPassword = 0; + wchar_t * _szUserName = 0; + wchar_t * _szPassword = 0; try { _szUserName = GetUnmanagedString(user); @@ -3856,8 +3857,8 @@ namespace Isam MJET_WRN% wrn) { ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); - _TCHAR * _szFilename = 0; - _TCHAR * _szConnect = 0; + wchar_t * _szFilename = 0; + wchar_t * _szConnect = 0; ::JET_DBID _dbid; ::JET_GRBIT _grbit = ( ::JET_GRBIT ) grbit; @@ -3904,7 +3905,7 @@ namespace Isam MJET_WRN% wrn) { ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); - _TCHAR * _szFilename = 0; + wchar_t * _szFilename = 0; const unsigned long _cpgMax = (unsigned long)maxPages; ::JET_DBID _dbid; ::JET_GRBIT _grbit = ( ::JET_GRBIT ) grbit; @@ -3929,7 +3930,7 @@ namespace Isam MJET_GRBIT grbit) { ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); - _TCHAR * _szFilename = 0; + wchar_t * _szFilename = 0; ::JET_GRBIT _grbit = ( ::JET_GRBIT ) grbit; MJET_WRN wrn; try @@ -3960,7 +3961,7 @@ namespace Isam MJET_GRBIT grbit) { ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); - _TCHAR * _szFilename = 0; + wchar_t * _szFilename = 0; const unsigned long _cpgMax = (unsigned long)maxPages; ::JET_GRBIT _grbit = ( ::JET_GRBIT ) grbit; MJET_WRN wrn; @@ -3981,7 +3982,7 @@ namespace Isam virtual MJET_WRN MJetDetachDatabase(MJET_SESID sesid, String^ file) { ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); - _TCHAR * _szFilename = 0; + wchar_t * _szFilename = 0; MJET_WRN wrn; try @@ -4004,7 +4005,7 @@ namespace Isam MJET_GRBIT grbit) { ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); - _TCHAR * _szFilename = 0; + wchar_t * _szFilename = 0; ::JET_GRBIT _grbit = ( ::JET_GRBIT ) grbit; MJET_WRN wrn; try @@ -4040,7 +4041,7 @@ namespace Isam { ::JET_SESID _sesid = GetUnmanagedSesid( tableid.Sesid ); ::JET_TABLEID _tableid = GetUnmanagedTableid( tableid ); - _TCHAR _szName[JET_cbNameMost+1]; + wchar_t _szName[JET_cbNameMost+1]; memset( _szName, 0, sizeof( _szName ) ); @@ -4081,7 +4082,7 @@ namespace Isam { ::JET_SESID _sesid = GetUnmanagedSesid( tableid.Sesid ); ::JET_TABLEID _tableid = GetUnmanagedTableid( tableid ); - _TCHAR _szName[JET_cbNameMost+1]; + wchar_t _szName[JET_cbNameMost+1]; memset( _szName, 0, sizeof( _szName ) ); @@ -4126,7 +4127,7 @@ namespace Isam ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); ::JET_DBID _dbid = GetUnmanagedDbid( dbid ); ::JET_OBJECTINFO _objectinfo; - _TCHAR * _szObject = 0; + wchar_t * _szObject = 0; _objectinfo.cbStruct = sizeof( _objectinfo ); @@ -4155,8 +4156,8 @@ namespace Isam ::JET_DBID _dbid = GetUnmanagedDbid( dbid ); ::JET_OBJECTINFO _objectinfo; ::JET_OBJTYP _objtyp = ( ::JET_OBJTYP ) objtyp; - _TCHAR * _szContainer = 0; - _TCHAR * _szObject = 0; + wchar_t * _szContainer = 0; + wchar_t * _szObject = 0; _objectinfo.cbStruct = sizeof( _objectinfo ); @@ -4217,7 +4218,7 @@ namespace Isam ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); ::JET_DBID _dbid = GetUnmanagedDbid( dbid ); ::JET_OBJTYP _objtyp = ( ::JET_OBJTYP ) objtyp; - _TCHAR * _szContainer = 0; + wchar_t * _szContainer = 0; ::JET_OBJECTLIST _objectlist; _objectlist.cbStruct = sizeof( _objectlist ); @@ -4246,8 +4247,8 @@ namespace Isam ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); ::JET_DBID _dbid = GetUnmanagedDbid( dbid ); ::JET_OBJTYP _objtyp = ( ::JET_OBJTYP ) objtyp; - _TCHAR * _szContainer = 0; - _TCHAR * _szObject = 0; + wchar_t * _szContainer = 0; + wchar_t * _szObject = 0; ::JET_OBJECTLIST _objectlist; _objectlist.cbStruct = sizeof( _objectlist ); @@ -4360,7 +4361,7 @@ namespace Isam virtual MJET_LOGINFOMISC MJetGetLogFileInfo(String^ logfile) { ::JET_LOGINFOMISC2 _loginfomisc; - _TCHAR * _szLogfile = 0; + wchar_t * _szLogfile = 0; try { _szLogfile = GetUnmanagedString(logfile); @@ -4416,7 +4417,7 @@ namespace Isam virtual MJET_DBINFO MJetGetDatabaseFileInfo(String^ database) { ::JET_DBINFOMISC _dbinfomisc; - _TCHAR * _szDatabase = 0; + wchar_t * _szDatabase = 0; try { _szDatabase = GetUnmanagedString(database); @@ -4436,7 +4437,7 @@ namespace Isam virtual MJET_DBINFO MJetGetDatabaseFileInfo(String^ database) { ::JET_DBINFOMISC4 _dbinfomisc4; - _TCHAR * _szDatabase = 0; + wchar_t * _szDatabase = 0; try { _szDatabase = GetUnmanagedString(database); @@ -4455,7 +4456,7 @@ namespace Isam virtual MJET_DBINFO MJetGetDatabaseFileInfo(String^ database) { ::JET_DBINFOMISC7 _dbinfomisc7; - _TCHAR * _szDatabase = 0; + wchar_t * _szDatabase = 0; try { _szDatabase = GetUnmanagedString(database); @@ -4594,7 +4595,7 @@ namespace Isam MJET_GRBIT grbit) { ::JET_INSTANCE _instance = GetUnmanagedInst( instance ); - _TCHAR * _szDatabase = 0; + wchar_t * _szDatabase = 0; unsigned long _genFirstDivergedLog = (unsigned long) genFirstDivergedLog; ::JET_GRBIT _grbit = (JET_GRBIT) grbit; @@ -4621,7 +4622,7 @@ namespace Isam MJET_GRBIT grbit) { ::JET_INSTANCE _instance = GetUnmanagedInst( instance ); - _TCHAR * _szDatabase = 0; + wchar_t * _szDatabase = 0; unsigned long _genMinRequired = (unsigned long) genMinRequired; unsigned long _genFirstDivergedLog = (unsigned long) genFirstDivergedLog; unsigned long _genMaxRequired = (unsigned long) genMaxRequired; @@ -4649,7 +4650,7 @@ namespace Isam MJET_GRBIT grbit) { ::JET_INSTANCE _instance = GetUnmanagedInst( instance ); - _TCHAR * _szDatabase = 0; + wchar_t * _szDatabase = 0; unsigned long _pgnoStart = (unsigned long) pgnoStart; unsigned long _cpg = (unsigned long) cpg; ::JET_GRBIT _grbit = (JET_GRBIT) grbit; @@ -4685,7 +4686,7 @@ namespace Isam { ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); ::JET_DBID _dbid = GetUnmanagedDbid( dbid ); - _TCHAR * _szTableName = 0; + wchar_t * _szTableName = 0; unsigned long _lPages = pages; unsigned long _lDensity = density; ::JET_TABLEID _tableid; @@ -4768,7 +4769,7 @@ namespace Isam { ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); ::JET_DBID _dbid = GetUnmanagedDbid( dbid ); - _TCHAR * _szTableName = 0; + wchar_t * _szTableName = 0; try { _szTableName = GetUnmanagedString(name); @@ -4790,8 +4791,8 @@ namespace Isam { ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); ::JET_DBID _dbid = GetUnmanagedDbid( dbid ); - _TCHAR * _szName = 0; - _TCHAR * _szNameNew = 0; + wchar_t * _szName = 0; + wchar_t * _szNameNew = 0; try { @@ -4819,7 +4820,7 @@ namespace Isam _columnbase.cbStruct = sizeof( _columnbase ); - Call( JetGetTableColumnInfo( _sesid, _tableid, (_TCHAR *)&_columnid, &_columnbase, sizeof( _columnbase ), JET_ColInfoBaseByColid ) ); + Call( JetGetTableColumnInfo( _sesid, _tableid, (wchar_t *)&_columnid, &_columnbase, sizeof( _columnbase ), JET_ColInfoBaseByColid ) ); return MakeManagedColumnbase( _columnbase ); } #endif @@ -4835,7 +4836,7 @@ namespace Isam ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); ::JET_DBID _dbid = GetUnmanagedDbid( dbid ); ::JET_COLUMNBASE _columnbase; - _TCHAR * _szTable = 0; + wchar_t * _szTable = 0; ::JET_COLUMNID _columnid = ::JET_COLUMNID( colid ); _columnbase.cbStruct = sizeof( _columnbase ); @@ -4847,7 +4848,7 @@ namespace Isam { _szTable = GetUnmanagedString( table ); - Call( ::JetGetColumnInfo( _sesid, _dbid, _szTable, (_TCHAR *)&_columnid, &_columnbase, sizeof( _columnbase ), JET_ColInfoByColid ) ); + Call( ::JetGetColumnInfo( _sesid, _dbid, _szTable, (wchar_t *)&_columnid, &_columnbase, sizeof( _columnbase ), JET_ColInfoByColid ) ); MJET_COLUMNID columnid = MakeManagedColumnid( _columnid, (MJET_COLTYP)_columnbase.coltyp, ( 1252 == _columnbase.cp )); columnName = MJetGetColumnName( sesid, dbid, table, columnid ); } @@ -4863,7 +4864,7 @@ namespace Isam try { _szTable = GetUnmanagedString( table ); - Call( ::JetGetColumnInfo( _sesid, _dbid, _szTable, (_TCHAR *)&_columnid, &_columnbase, sizeof( _columnbase ), JET_ColInfoBaseByColid ) ); + Call( ::JetGetColumnInfo( _sesid, _dbid, _szTable, (wchar_t *)&_columnid, &_columnbase, sizeof( _columnbase ), JET_ColInfoBaseByColid ) ); } __finally { @@ -4884,7 +4885,7 @@ namespace Isam ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); ::JET_DBID _dbid = GetUnmanagedDbid( dbid ); ::JET_COLUMNBASE _columnbase; - _TCHAR * _szTable = 0; + wchar_t * _szTable = 0; ::JET_COLUMNID _columnid = GetUnmanagedColumnid( columnid ); _columnbase.cbStruct = sizeof( _columnbase ); @@ -4894,7 +4895,7 @@ namespace Isam try { _szTable = GetUnmanagedString( table ); - Call( ::JetGetColumnInfo( _sesid, _dbid, _szTable, (_TCHAR *)&_columnid, &_columnbase, sizeof( _columnbase ), JET_ColInfoByColid ) ); + Call( ::JetGetColumnInfo( _sesid, _dbid, _szTable, (wchar_t *)&_columnid, &_columnbase, sizeof( _columnbase ), JET_ColInfoByColid ) ); columnName = MJetGetColumnName( sesid, dbid, table, columnid ); } __finally @@ -4909,7 +4910,7 @@ namespace Isam try { _szTable = GetUnmanagedString( table ); - Call( ::JetGetColumnInfo( _sesid, _dbid, _szTable, (_TCHAR *)&_columnid, &_columnbase, sizeof( _columnbase ), JET_ColInfoBaseByColid ) ); + Call( ::JetGetColumnInfo( _sesid, _dbid, _szTable, (wchar_t *)&_columnid, &_columnbase, sizeof( _columnbase ), JET_ColInfoBaseByColid ) ); } __finally { @@ -4932,7 +4933,7 @@ namespace Isam _columnbase.cbStruct = sizeof( _columnbase ); - Call( JetGetTableColumnInfo( _sesid, _tableid, (_TCHAR *)&_columnid, &_columnbase, sizeof( _columnbase ), JET_ColInfoBaseByColid ) ); + Call( JetGetTableColumnInfo( _sesid, _tableid, (wchar_t *)&_columnid, &_columnbase, sizeof( _columnbase ), JET_ColInfoBaseByColid ) ); return MakeManagedColumnbase( _columnbase ); } #endif @@ -4944,7 +4945,7 @@ namespace Isam ::JET_SESID _sesid = GetUnmanagedSesid( tableid.Sesid ); ::JET_TABLEID _tableid = GetUnmanagedTableid( tableid ); ::JET_COLUMNBASE _columnbase; - _TCHAR * _szColumn = 0; + wchar_t * _szColumn = 0; _columnbase.cbStruct = sizeof( _columnbase ); @@ -4971,8 +4972,8 @@ namespace Isam ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); ::JET_DBID _dbid = GetUnmanagedDbid( dbid ); ::JET_COLUMNBASE _columnbase; - _TCHAR * _szTable = 0; - _TCHAR * _szColumn = 0; + wchar_t * _szTable = 0; + wchar_t * _szColumn = 0; _columnbase.cbStruct = sizeof( _columnbase ); @@ -5002,7 +5003,7 @@ namespace Isam ::JET_DBID _dbid = GetUnmanagedDbid( dbid ); ::JET_GRBIT _grbit = sortByColumnid ? JET_ColInfoListSortColumnid : JET_ColInfoList; ::JET_COLUMNLIST _columnlist; - _TCHAR * _szTable = 0; + wchar_t * _szTable = 0; _columnlist.cbStruct = sizeof( _columnlist ); @@ -5050,7 +5051,7 @@ namespace Isam { ::JET_SESID _sesid = GetUnmanagedSesid( tableid.Sesid ); ::JET_TABLEID _tableid = GetUnmanagedTableid( tableid ); - _TCHAR * _szColumnName = 0; + wchar_t * _szColumnName = 0; ::JET_COLUMNDEF _columndef = GetUnmanagedColumndef( definition ); ::JET_COLUMNID _columnid; void * _pvDefault = 0; @@ -5088,7 +5089,7 @@ namespace Isam { ::JET_SESID _sesid = GetUnmanagedSesid( tableid.Sesid ); ::JET_TABLEID _tableid = GetUnmanagedTableid( tableid ); - _TCHAR * _szColumnName = 0; + wchar_t * _szColumnName = 0; try { @@ -5110,7 +5111,7 @@ namespace Isam { ::JET_SESID _sesid = GetUnmanagedSesid( tableid.Sesid ); ::JET_TABLEID _tableid = GetUnmanagedTableid( tableid ); - _TCHAR * _szColumnName = 0; + wchar_t * _szColumnName = 0; ::JET_GRBIT _grbit = ( ::JET_GRBIT ) grbit; try { @@ -5133,8 +5134,8 @@ namespace Isam { ::JET_SESID _sesid = GetUnmanagedSesid( tableid.Sesid ); ::JET_TABLEID _tableid = GetUnmanagedTableid( tableid ); - _TCHAR * _szName = 0; - _TCHAR * _szNameNew = 0; + wchar_t * _szName = 0; + wchar_t * _szNameNew = 0; ::JET_GRBIT _grbit = ( ::JET_GRBIT ) grbit; try @@ -5163,8 +5164,8 @@ namespace Isam ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); ::JET_DBID _dbid = GetUnmanagedDbid( dbid ); ::JET_GRBIT _grbit = ( ::JET_GRBIT ) grbit; - _TCHAR * _szTableName = 0; - _TCHAR * _szColumnName = 0; + wchar_t * _szTableName = 0; + wchar_t * _szColumnName = 0; void * _pvDefault = 0; int _cbDefault; @@ -5191,7 +5192,7 @@ namespace Isam ::JET_SESID _sesid = GetUnmanagedSesid( tableid.Sesid ); ::JET_TABLEID _tableid = GetUnmanagedTableid( tableid ); ::JET_INDEXLIST _indexlist; - _TCHAR * _szIndex = 0; + wchar_t * _szIndex = 0; _indexlist.cbStruct = sizeof( _indexlist ); @@ -5218,8 +5219,8 @@ namespace Isam ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); ::JET_DBID _dbid = GetUnmanagedDbid( dbid ); ::JET_INDEXLIST _indexlist; - _TCHAR * _szTable = 0; - _TCHAR * _szIndex = 0; + wchar_t * _szTable = 0; + wchar_t * _szIndex = 0; _indexlist.cbStruct = sizeof( _indexlist ); @@ -5261,7 +5262,7 @@ namespace Isam ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); ::JET_DBID _dbid = GetUnmanagedDbid( dbid ); ::JET_INDEXLIST _indexlist; - _TCHAR * _szTable = 0; + wchar_t * _szTable = 0; _indexlist.cbStruct = sizeof( _indexlist ); @@ -5284,7 +5285,7 @@ namespace Isam ::JET_SESID _sesid = GetUnmanagedSesid( tableid.Sesid ); ::JET_TABLEID _tableid = GetUnmanagedTableid( tableid ); unsigned long _ulDensity = 0; - _TCHAR * _szIndex = 0; + wchar_t * _szIndex = 0; try { @@ -5309,8 +5310,8 @@ namespace Isam ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); ::JET_DBID _dbid = GetUnmanagedDbid( dbid ); unsigned long _ulDensity = 0; - _TCHAR * _szTable = 0; - _TCHAR * _szIndex = 0; + wchar_t * _szTable = 0; + wchar_t * _szIndex = 0; try { @@ -5333,7 +5334,7 @@ namespace Isam ::JET_SESID _sesid = GetUnmanagedSesid( tableid.Sesid ); ::JET_TABLEID _tableid = GetUnmanagedTableid( tableid ); ::LCID _lcid; - _TCHAR * _szIndex = 0; + wchar_t * _szIndex = 0; try { @@ -5358,8 +5359,8 @@ namespace Isam ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); ::JET_DBID _dbid = GetUnmanagedDbid( dbid ); ::LCID _lcid; - _TCHAR * _szTable = 0; - _TCHAR * _szIndex = 0; + wchar_t * _szTable = 0; + wchar_t * _szIndex = 0; try { @@ -5401,7 +5402,7 @@ namespace Isam ::JET_SESID _sesid = GetUnmanagedSesid( tableid.Sesid ); ::JET_TABLEID _tableid = GetUnmanagedTableid( tableid ); unsigned short _cbKeyMost = 0; - _TCHAR * _szIndex = 0; + wchar_t * _szIndex = 0; try { @@ -5426,8 +5427,8 @@ namespace Isam ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); ::JET_DBID _dbid = GetUnmanagedDbid( dbid ); unsigned short _cbKeyMost = 0; - _TCHAR * _szTable = 0; - _TCHAR * _szIndex = 0; + wchar_t * _szTable = 0; + wchar_t * _szIndex = 0; try { @@ -5452,7 +5453,7 @@ namespace Isam { ::JET_SESID _sesid = GetUnmanagedSesid( tableid.Sesid ); ::JET_TABLEID _tableid = GetUnmanagedTableid( tableid ); - _TCHAR * _szIndex = 0; + wchar_t * _szIndex = 0; ULONG _cbData = 4*1024; void * _pvData = NULL; ::JET_ERR _err = JET_errSuccess; @@ -5501,8 +5502,8 @@ namespace Isam { ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); ::JET_DBID _dbid = GetUnmanagedDbid( dbid ); - _TCHAR * _szTable = 0; - _TCHAR * _szIndex = 0; + wchar_t * _szTable = 0; + wchar_t * _szIndex = 0; ULONG _cbData = 4*1024; void * _pvData = NULL; ::JET_ERR _err = JET_errSuccess; @@ -5556,9 +5557,9 @@ namespace Isam ::JET_SESID _sesid = GetUnmanagedSesid( tableid.Sesid ); ::JET_TABLEID _tableid = GetUnmanagedTableid( tableid ); ::JET_GRBIT _grbit = ( ::JET_GRBIT ) grbit; - _TCHAR * _szName = 0; - _TCHAR * _szKey = 0; - unsigned long _cbKey = (key->Length + 1) * sizeof(_TCHAR); + wchar_t * _szName = 0; + wchar_t * _szKey = 0; + unsigned long _cbKey = (key->Length + 1) * sizeof(wchar_t); unsigned long _lDensity = density; try @@ -5643,7 +5644,7 @@ namespace Isam { ::JET_SESID _sesid = GetUnmanagedSesid( tableid.Sesid ); ::JET_TABLEID _tableid = GetUnmanagedTableid( tableid ); - _TCHAR * _szName = 0; + wchar_t * _szName = 0; try { @@ -5796,7 +5797,7 @@ namespace Isam ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); ::JET_DBID _dbid = GetUnmanagedDbid( dbid ); const int _cwchBuffer = 1024; - _TCHAR _rgwchBuffer[_cwchBuffer]; + wchar_t _rgwchBuffer[_cwchBuffer]; Call( ::JetGetDatabaseInfo( _sesid, _dbid, _rgwchBuffer, sizeof( _rgwchBuffer ), JET_DbInfoFilename ) ); return MakeManagedString( _rgwchBuffer ); @@ -5827,8 +5828,8 @@ namespace Isam { ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); ::JET_GRBIT _grbit = ( ::JET_GRBIT ) grbit; - _TCHAR * _szFilename = 0; - _TCHAR * _szConnect = 0; + wchar_t * _szFilename = 0; + wchar_t * _szConnect = 0; ::JET_DBID _dbid; try @@ -5905,7 +5906,7 @@ namespace Isam { ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); ::JET_DBID _dbid = GetUnmanagedDbid( dbid ); - _TCHAR * _szName = 0; + wchar_t * _szName = 0; ::JET_GRBIT _grbit = ( ::JET_GRBIT ) grbit; ::JET_TABLEID _tableid; void * _pvParameters = 0; @@ -5956,7 +5957,7 @@ namespace Isam { ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); ::JET_DBID _dbid = GetUnmanagedDbid( dbid ); - _TCHAR * _szName = 0; + wchar_t * _szName = 0; ::JET_TABLEID _tableid; tableid = *(new MJET_TABLEID()); @@ -6479,12 +6480,12 @@ namespace Isam ::JET_COLUMNBASE _columnbase = { sizeof( ::JET_COLUMNBASE ) }; #if (ESENT_WINXP) - Call( ::JetGetTableColumnInfo( _sesid, _tableid, (_TCHAR *)&_columnid, &_columnbase, sizeof( _columnbase ), JET_ColInfoByColid ) ); + Call( ::JetGetTableColumnInfo( _sesid, _tableid, (wchar_t *)&_columnid, &_columnbase, sizeof( _columnbase ), JET_ColInfoByColid ) ); columns[ _iEnumColumn ].Columnid = MakeManagedColumnid( _columnid, (MJET_COLTYP)_columnbase.coltyp, ( 1252 == _columnbase.cp ) ); columns[ _iEnumColumn ].ColumnName = MJetGetColumnName( tableid, columns[ _iEnumColumn ].Columnid ); #else - Call( ::JetGetTableColumnInfo( _sesid, _tableid, (_TCHAR *)&_columnid, &_columnbase, sizeof( _columnbase ), JET_ColInfoBaseByColid ) ); + Call( ::JetGetTableColumnInfo( _sesid, _tableid, (wchar_t *)&_columnid, &_columnbase, sizeof( _columnbase ), JET_ColInfoBaseByColid ) ); columns[ _iEnumColumn ].Columnid = MakeManagedColumnid( _columnbase.columnid, (MJET_COLTYP)_columnbase.coltyp, ( 1252 == _columnbase.cp ) ); columns[ _iEnumColumn ].ColumnName = MakeManagedString( _columnbase.szBaseColumnName ); @@ -6752,7 +6753,7 @@ namespace Isam ::JET_SESID _sesid = GetUnmanagedSesid( tableid.Sesid ); ::JET_TABLEID _tableid = GetUnmanagedTableid( tableid ); - _TCHAR _szName[JET_cbNameMost+1]; + wchar_t _szName[JET_cbNameMost+1]; Call( ::JetGetCurrentIndex( _sesid, _tableid, _szName, _countof( _szName ) ) ); String ^ name = MakeManagedString( _szName ); @@ -6765,7 +6766,7 @@ namespace Isam { ::JET_SESID _sesid = GetUnmanagedSesid( tableid.Sesid ); ::JET_TABLEID _tableid = GetUnmanagedTableid( tableid ); - _TCHAR * _szIndexName = 0; + wchar_t * _szIndexName = 0; try { @@ -6788,7 +6789,7 @@ namespace Isam ::JET_SESID _sesid = GetUnmanagedSesid( tableid.Sesid ); ::JET_TABLEID _tableid = GetUnmanagedTableid( tableid ); ::JET_GRBIT _grbit = ( ::JET_GRBIT ) grbit; - _TCHAR * _szIndexName = 0; + wchar_t * _szIndexName = 0; try { @@ -6812,7 +6813,7 @@ namespace Isam ::JET_SESID _sesid = GetUnmanagedSesid( tableid.Sesid ); ::JET_TABLEID _tableid = GetUnmanagedTableid( tableid ); ::JET_GRBIT _grbit = ( ::JET_GRBIT ) grbit; - _TCHAR * _szIndexName = 0; + wchar_t * _szIndexName = 0; unsigned long _itagSequence = (unsigned long)itag; try @@ -6833,7 +6834,7 @@ namespace Isam JET_ERR JET_API MJetSetCurrentIndex4( JET_SESID sesid, JET_TABLEID tableid, - const _TCHAR *szIndexName, + const wchar_t *szIndexName, JET_INDEXID *pindexid, JET_GRBIT grbit, unsigned long itagSequence ); @@ -7198,8 +7199,8 @@ namespace Isam JET_ERR JET_API MJetCompact( JET_SESID sesid, - const _TCHAR *szDatabaseSrc, - const _TCHAR *szDatabaseDest, + const wchar_t *szDatabaseSrc, + const wchar_t *szDatabaseDest, JET_PFNSTATUS pfnStatus, JET_CONVERT *pconvert, JET_GRBIT grbit ); @@ -7218,7 +7219,7 @@ namespace Isam { ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); ::JET_DBID _dbid = GetUnmanagedDbid( dbid ); - _TCHAR * _szTableName = 0; + wchar_t * _szTableName = 0; ::JET_GRBIT _grbit = ( ::JET_GRBIT ) grbit; unsigned long _cPasses = passes; unsigned long _cSeconds = seconds; @@ -7254,7 +7255,7 @@ namespace Isam { ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); ::JET_DBID _dbid = GetUnmanagedDbid( dbid ); - _TCHAR * _szTableName = 0; + wchar_t * _szTableName = 0; ::JET_GRBIT _grbit = ( ::JET_GRBIT ) grbit; unsigned long _cPasses = passes; unsigned long _cSeconds = seconds; @@ -7307,8 +7308,8 @@ namespace Isam JET_ERR JET_API MJetDefragment3( JET_SESID vsesid, - const _TCHAR *szDatabaseName, - const _TCHAR *szTableName, + const wchar_t *szDatabaseName, + const wchar_t *szTableName, unsigned long *pcPasses, unsigned long *pcSeconds, JET_CALLBACK callback, @@ -7367,8 +7368,8 @@ namespace Isam MJET_GRBIT grbit) { ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); - _TCHAR * _szDbFilename = 0; - _TCHAR * _szSLVFilename = 0; + wchar_t * _szDbFilename = 0; + wchar_t * _szSLVFilename = 0; ::JET_GRBIT _grbit = ( ::JET_GRBIT ) grbit; try @@ -7430,7 +7431,7 @@ namespace Isam Int64 pages) { ::JET_SESID _sesid = GetUnmanagedSesid( sesid ); - _TCHAR * _szDatabase = 0; + wchar_t * _szDatabase = 0; unsigned long _cpg = (unsigned long)pages;; unsigned long _cpgReal; @@ -7816,18 +7817,18 @@ namespace Isam /* UNDONE - JET_ERR JET_API MJetBackup( const _TCHAR *szBackupPath, JET_GRBIT grbit, JET_PFNSTATUS pfnStatus ); + JET_ERR JET_API MJetBackup( const wchar_t *szBackupPath, JET_GRBIT grbit, JET_PFNSTATUS pfnStatus ); JET_ERR JET_API MJetBackupInstance( JET_INSTANCE instance, - const _TCHAR *szBackupPath, + const wchar_t *szBackupPath, JET_GRBIT grbit, JET_PFNSTATUS pfnStatus ); - JET_ERR JET_API MJetRestore(const _TCHAR *sz, JET_PFNSTATUS pfn ); - JET_ERR JET_API MJetRestore2(const _TCHAR *sz, const _TCHAR *szDest, JET_PFNSTATUS pfn ); + JET_ERR JET_API MJetRestore(const wchar_t *sz, JET_PFNSTATUS pfn ); + JET_ERR JET_API MJetRestore2(const wchar_t *sz, const wchar_t *szDest, JET_PFNSTATUS pfn ); JET_ERR JET_API MJetRestoreInstance( JET_INSTANCE instance, - const _TCHAR *sz, - const _TCHAR *szDest, + const wchar_t *sz, + const wchar_t *szDest, JET_PFNSTATUS pfn ); */ @@ -7919,20 +7920,20 @@ namespace Isam unsigned long cbMax, unsigned long *pcbActual ); - JET_ERR JET_API MJetOpenFile( const _TCHAR *szFilename, + JET_ERR JET_API MJetOpenFile( const wchar_t *szFilename, JET_HANDLE *phfFile, unsigned long *pulFileSizeLow, unsigned long *pulFileSizeHigh ); JET_ERR JET_API MJetOpenFileInstance( JET_INSTANCE instance, - const _TCHAR *szFilename, + const wchar_t *szFilename, JET_HANDLE *phfFile, unsigned long *pulFileSizeLow, unsigned long *pulFileSizeHigh ); JET_ERR JET_API MJetOpenFileSectionInstance( JET_INSTANCE instance, - _TCHAR *szFile, + wchar_t *szFile, JET_HANDLE *phFile, long iSection, long cSections, @@ -7989,24 +7990,24 @@ namespace Isam JET_ERR JET_API MJetEndExternalBackupInstance2( JET_INSTANCE instance, JET_GRBIT grbit ); - JET_ERR JET_API MJetExternalRestore( _TCHAR *szCheckpointFilePath, - _TCHAR *szLogPath, + JET_ERR JET_API MJetExternalRestore( wchar_t *szCheckpointFilePath, + wchar_t *szLogPath, JET_RSTMAP *rgstmap, long crstfilemap, - _TCHAR *szBackupLogPath, + wchar_t *szBackupLogPath, long genLow, long genHigh, JET_PFNSTATUS pfn ); - JET_ERR JET_API MJetExternalRestore2( _TCHAR *szCheckpointFilePath, - _TCHAR *szLogPath, + JET_ERR JET_API MJetExternalRestore2( wchar_t *szCheckpointFilePath, + wchar_t *szLogPath, JET_RSTMAP *rgstmap, long crstfilemap, - _TCHAR *szBackupLogPath, + wchar_t *szBackupLogPath, JET_LOGINFO * pLogInfo, - _TCHAR *szTargetInstanceName, - _TCHAR *szTargetInstanceLogPath, - _TCHAR *szTargetInstanceCheckpointPath, + wchar_t *szTargetInstanceName, + wchar_t *szTargetInstanceLogPath, + wchar_t *szTargetInstanceCheckpointPath, JET_PFNSTATUS pfn ); JET_ERR JET_API MJetRegisterCallback( @@ -8600,7 +8601,7 @@ namespace Isam _indexcreate.cbStruct = sizeof( _indexcreate ); _indexcreate.szIndexName = GetUnmanagedString( indexcreate.IndexName ); _indexcreate.szKey = GetUnmanagedString( indexcreate.Key ); - _indexcreate.cbKey = ( unsigned long )( indexcreate.Key->Length + 1 ) * sizeof(_TCHAR); + _indexcreate.cbKey = ( unsigned long )( indexcreate.Key->Length + 1 ) * sizeof(wchar_t); _indexcreate.grbit = ( ::JET_GRBIT ) indexcreate.Grbit; _indexcreate.ulDensity = ( unsigned long )indexcreate.Density; _indexcreate.err = 0; @@ -8707,7 +8708,7 @@ namespace Isam _indexcreate.cbStruct = sizeof( _indexcreate ); _indexcreate.szIndexName = GetUnmanagedString( indexcreate.IndexName ); _indexcreate.szKey = GetUnmanagedString( indexcreate.Key ); - _indexcreate.cbKey = ( unsigned long )( indexcreate.Key->Length + 1 ) * sizeof(_TCHAR); + _indexcreate.cbKey = ( unsigned long )( indexcreate.Key->Length + 1 ) * sizeof(wchar_t); _indexcreate.grbit = ( ::JET_GRBIT ) indexcreate.Grbit; _indexcreate.ulDensity = ( unsigned long )indexcreate.Density; _indexcreate.err = 0; @@ -9097,8 +9098,8 @@ namespace Isam { FreeUnmanagedString( _dbutil.szDatabase ); FreeUnmanagedString( _dbutil.szBackup ); - FreeUnmanagedString( (_TCHAR *)_dbutil.szTable ); - FreeUnmanagedString( (_TCHAR *)_dbutil.szIndex ); + FreeUnmanagedString( (wchar_t *)_dbutil.szTable ); + FreeUnmanagedString( (wchar_t *)_dbutil.szIndex ); FreeUnmanagedString( _dbutil.szIntegPrefix ); } @@ -11589,7 +11590,7 @@ namespace Isam JET_ERR JET_API MJetSetCurrentIndex4( JET_SESID sesid, JET_TABLEID tableid, - const _TCHAR *szIndexName, + const wchar_t *szIndexName, JET_INDEXID *pindexid, JET_GRBIT grbit, unsigned long itagSequence ); @@ -11794,8 +11795,8 @@ namespace Isam JET_ERR JET_API MJetCompact( JET_SESID sesid, - const _TCHAR *szDatabaseSrc, - const _TCHAR *szDatabaseDest, + const wchar_t *szDatabaseSrc, + const wchar_t *szDatabaseDest, JET_PFNSTATUS pfnStatus, JET_CONVERT *pconvert, JET_GRBIT grbit ); @@ -11836,8 +11837,8 @@ namespace Isam JET_ERR JET_API MJetDefragment3( JET_SESID vsesid, - const _TCHAR *szDatabaseName, - const _TCHAR *szTableName, + const wchar_t *szDatabaseName, + const wchar_t *szTableName, unsigned long *pcPasses, unsigned long *pcSeconds, JET_CALLBACK callback, @@ -12093,18 +12094,18 @@ namespace Isam /* UNDONE - JET_ERR JET_API MJetBackup( const _TCHAR *szBackupPath, JET_GRBIT grbit, JET_PFNSTATUS pfnStatus ); + JET_ERR JET_API MJetBackup( const wchar_t *szBackupPath, JET_GRBIT grbit, JET_PFNSTATUS pfnStatus ); JET_ERR JET_API MJetBackupInstance( JET_INSTANCE instance, - const _TCHAR *szBackupPath, + const wchar_t *szBackupPath, JET_GRBIT grbit, JET_PFNSTATUS pfnStatus ); - JET_ERR JET_API MJetRestore(const _TCHAR *sz, JET_PFNSTATUS pfn ); - JET_ERR JET_API MJetRestore2(const _TCHAR *sz, const _TCHAR *szDest, JET_PFNSTATUS pfn ); + JET_ERR JET_API MJetRestore(const wchar_t *sz, JET_PFNSTATUS pfn ); + JET_ERR JET_API MJetRestore2(const wchar_t *sz, const wchar_t *szDest, JET_PFNSTATUS pfn ); JET_ERR JET_API MJetRestoreInstance( JET_INSTANCE instance, - const _TCHAR *sz, - const _TCHAR *szDest, + const wchar_t *sz, + const wchar_t *szDest, JET_PFNSTATUS pfn ); */ @@ -12167,20 +12168,20 @@ namespace Isam unsigned long cbMax, unsigned long *pcbActual ); - JET_ERR JET_API MJetOpenFile( const _TCHAR *szFilename, + JET_ERR JET_API MJetOpenFile( const wchar_t *szFilename, JET_HANDLE *phfFile, unsigned long *pulFileSizeLow, unsigned long *pulFileSizeHigh ); JET_ERR JET_API MJetOpenFileInstance( JET_INSTANCE instance, - const _TCHAR *szFilename, + const wchar_t *szFilename, JET_HANDLE *phfFile, unsigned long *pulFileSizeLow, unsigned long *pulFileSizeHigh ); JET_ERR JET_API MJetOpenFileSectionInstance( JET_INSTANCE instance, - _TCHAR *szFile, + wchar_t *szFile, JET_HANDLE *phFile, long iSection, long cSections, @@ -12237,24 +12238,24 @@ namespace Isam JET_ERR JET_API MJetEndExternalBackupInstance2( JET_INSTANCE instance, JET_GRBIT grbit ); - JET_ERR JET_API MJetExternalRestore( _TCHAR *szCheckpointFilePath, - _TCHAR *szLogPath, + JET_ERR JET_API MJetExternalRestore( wchar_t *szCheckpointFilePath, + wchar_t *szLogPath, JET_RSTMAP *rgstmap, long crstfilemap, - _TCHAR *szBackupLogPath, + wchar_t *szBackupLogPath, long genLow, long genHigh, JET_PFNSTATUS pfn ); - JET_ERR JET_API MJetExternalRestore2( _TCHAR *szCheckpointFilePath, - _TCHAR *szLogPath, + JET_ERR JET_API MJetExternalRestore2( wchar_t *szCheckpointFilePath, + wchar_t *szLogPath, JET_RSTMAP *rgstmap, long crstfilemap, - _TCHAR *szBackupLogPath, + wchar_t *szBackupLogPath, JET_LOGINFO * pLogInfo, - _TCHAR *szTargetInstanceName, - _TCHAR *szTargetInstanceLogPath, - _TCHAR *szTargetInstanceCheckpointPath, + wchar_t *szTargetInstanceName, + wchar_t *szTargetInstanceLogPath, + wchar_t *szTargetInstanceCheckpointPath, JET_PFNSTATUS pfn ); JET_ERR JET_API MJetRegisterCallback( diff --git a/dev/ese/src/os/_osfile.hxx b/dev/ese/src/os/_osfile.hxx index 2e81a8da..c80b6e43 100644 --- a/dev/ese/src/os/_osfile.hxx +++ b/dev/ese/src/os/_osfile.hxx @@ -306,7 +306,7 @@ class COSFile // osf public: CIOComplete() - : m_signal( CSyncBasicInfo( _T( "CIOComplete::m_signal" ) ) ), + : m_signal( CSyncBasicInfo( "CIOComplete::m_signal" ) ), m_err( JET_errSuccess ), m_tidWait( DwUtilThreadId() ), m_keyIOComplete( NULL ), diff --git a/dev/ese/src/os/_ostls.hxx b/dev/ese/src/os/_ostls.hxx index ae53ef82..f2e9e8b8 100644 --- a/dev/ese/src/os/_ostls.hxx +++ b/dev/ese/src/os/_ostls.hxx @@ -52,7 +52,7 @@ struct OSTLS // other properties - const _TCHAR* szCprintfPrefix; + const CHAR* szCprintfPrefix; ULONG dwCurPerfObj; // perf object for which data is currently being collected diff --git a/dev/ese/src/os/cprintf.cxx b/dev/ese/src/os/cprintf.cxx index 4ede2e59..b8846fc9 100644 --- a/dev/ese/src/os/cprintf.cxx +++ b/dev/ese/src/os/cprintf.cxx @@ -34,7 +34,7 @@ void __cdecl CPRINTFDBGOUT::operator()( const CHAR* szFormat, ... ) va_list arg_ptr; va_start( arg_ptr, szFormat ); - StringCbVPrintfA( rgchBuf, cchBuf, szFormat, arg_ptr ); + OSStrCbVFormatA( rgchBuf, cchBuf, szFormat, arg_ptr ); va_end( arg_ptr ); // output the string to the debugger @@ -42,32 +42,135 @@ void __cdecl CPRINTFDBGOUT::operator()( const CHAR* szFormat, ... ) OutputDebugString( rgchBuf ); } -CPRINTFFILE::CPRINTFFILE( const WCHAR* wszFile ) + +CPRINTFFILE::CPRINTFFILE( const WCHAR* wszFile, CPRINTFFILE::FILEENCODING feEncodingType ) { + DWORD dwWin32Err; + BOOL fNewFile = fFalse; + // open the file for append - + m_hFile = INVALID_HANDLE_VALUE; m_hMutex = NULL; + m_errLast = JET_errInvalidParameter; + + switch ( feEncodingType ) + { + case FILEENCODING::ASCII: + case FILEENCODING::UTF16: + m_feEncodingType = feEncodingType; + break; + + default: + // Default any unrecognized encoding type to ASCII. + m_feEncodingType = FILEENCODING::ASCII; + Assert( fFalse ); + break; + } - if ( ( m_hFile = (void*)CreateFileW( wszFile, GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL ) ) == INVALID_HANDLE_VALUE ) + if ( NULL == wszFile ) { return; } + + m_hFile = ( void* )CreateFileW( + wszFile, + GENERIC_WRITE, + FILE_SHARE_READ | FILE_SHARE_WRITE, + NULL, + OPEN_ALWAYS, + FILE_ATTRIBUTE_NORMAL, + NULL ); + + dwWin32Err = GetLastError(); + + if ( m_hFile == INVALID_HANDLE_VALUE ) + { + m_errLast = ErrOSErrFromWin32Err( dwWin32Err ); + return; + } + + switch ( dwWin32Err ) + { + case ERROR_SUCCESS: + fNewFile = fTrue; + break; + + case ERROR_ALREADY_EXISTS: + // Opened existing file. + fNewFile = fFalse; + break; + + default: + // What error did we get where we also got a valid file handle? We're going to + // deal with it as a successful fresh file, which is what we've historically done. + Assert( fFalse ); + fNewFile = fTrue; + break; + } + SetHandleInformation( HANDLE( m_hFile ), HANDLE_FLAG_PROTECT_FROM_CLOSE, HANDLE_FLAG_PROTECT_FROM_CLOSE ); - if ( !( m_hMutex = (void*)CreateMutexW( NULL, FALSE, NULL ) ) ) + + m_hMutex = ( void* )CreateMutexW( NULL, FALSE, NULL ); + + dwWin32Err = GetLastError(); + + if ( m_hMutex == NULL ) { + m_errLast = ErrOSErrFromWin32Err( dwWin32Err ); + if ( m_errLast == JET_errSuccess ) + { + // Force an error into errLast so we don't write to the file. + m_errLast = JET_errInvalidParameter; + } + SetHandleInformation( HANDLE( m_hFile ), HANDLE_FLAG_PROTECT_FROM_CLOSE, 0 ); CloseHandle( HANDLE( m_hFile ) ); m_hFile = INVALID_HANDLE_VALUE; return; } + SetHandleInformation( HANDLE( m_hMutex ), HANDLE_FLAG_PROTECT_FROM_CLOSE, HANDLE_FLAG_PROTECT_FROM_CLOSE ); + m_errLast = JET_errSuccess; + + // If we created this file fresh and we're writing Unicode, we need to push in the Unicode byte order mark. + BYTE rgbBOMUTF16[] = { 0xFF, 0xFE }; // UTF-16little endian byte order mark. + switch ( m_feEncodingType ) + { + case FILEENCODING::UTF16: + if ( fNewFile ) + { + this->PutBytesInFile_( rgbBOMUTF16, _countof(rgbBOMUTF16) ); + } + // else + // { + // We could verify the BOM in an existing Unicode file. + // } + break; + + case FILEENCODING::ASCII: + // if ( fNewFile ) + // { + // No BOM. + // } + // else + // { + // We could verify the lack of BOM in an existing ASCII file. + // } + break; + + default: + Assert( fFalse ); // Not possible. + break; + } } CPRINTFFILE::~CPRINTFFILE() { // close the file + m_errLast = JET_errInvalidParameter; + if ( m_hMutex ) { SetHandleInformation( HANDLE( m_hMutex ), HANDLE_FLAG_PROTECT_FROM_CLOSE, 0 ); @@ -84,169 +187,177 @@ CPRINTFFILE::~CPRINTFFILE() } // ================================================================ -void __cdecl CPRINTFFILE::operator()( const CHAR* szFormat, ... ) +void __cdecl CPRINTFFILE::VerifyOnlyDOSTextFileLineReturns_( PCWSTR wsz ) // ================================================================ -{ - if ( HANDLE( m_hFile ) != INVALID_HANDLE_VALUE ) +{ + // UNDONE: Maybe put in osfile, as we should be doing this in other places. + for ( PCWSTR wszT = wcschr( wsz, L'\n' ); wszT; wszT = wcschr( wszT, L'\n' ) ) { - const size_t cchBuf = 1024; - CHAR rgchBuf[ cchBuf ]; - - // print into a temp buffer, truncating the string if too large - - va_list arg_ptr; - va_start( arg_ptr, szFormat ); - StringCbVPrintfA( rgchBuf, cchBuf, szFormat, arg_ptr ); - va_end( arg_ptr ); - - // append the string to the file - - WaitForSingleObject( HANDLE( m_hMutex ), INFINITE ); - - const LARGE_INTEGER ibOffset = { 0, 0 }; - SetFilePointerEx( HANDLE( m_hFile ), ibOffset, NULL, FILE_END ); + if ( ( wszT == wsz ) // this would mean rgchBuf[0] == L'\n', so that's bad. + || + (( wszT + 1 > wsz ) && + ( *( wszT-1 ) ) != L'\r' ) ){ + AssertSz( fFalse, "We've detected someone trying to print a \\n to a file, only \\r\\n is supported as line return!" ); + } + wszT++; // presumes NULL terminated to avoid running off end. + } +} +// ================================================================ +void __cdecl CPRINTFFILE::PutBytesInFile_( BYTE *pb, ULONG cb ) +// ================================================================ +{ + if ( WAIT_OBJECT_0 == WaitForSingleObject( HANDLE( m_hMutex ), INFINITE ) ) + { DWORD cbWritten; - WriteFile( HANDLE( m_hFile ), rgchBuf, (ULONG)(_tcslen( rgchBuf ) * sizeof( _TCHAR )), &cbWritten, NULL ); + const LARGE_INTEGER ibOffset = { 0, 0 }; + // Stop writing after first error + if ( !SetFilePointerEx( HANDLE( m_hFile ), ibOffset, NULL, FILE_END ) ) + { + m_errLast = ErrOSErrFromWin32Err( GetLastError() ); + if ( m_errLast == JET_errSuccess ) + { + // Force an error into errLast so we don't write to the file. + m_errLast = JET_errInvalidParameter; + } + } + else if ( !WriteFile( + HANDLE( m_hFile ), + pb, + cb, + &cbWritten, + NULL ) ) + { + m_errLast = ErrOSErrFromWin32Err( GetLastError() ); + if ( m_errLast == JET_errSuccess ) + { + // Force an error into errLast so we don't write to the file. + m_errLast = JET_errInvalidParameter; + } + } ReleaseMutex( HANDLE( m_hMutex ) ); } + else + { + // Stop writing after first error + m_errLast = ErrOSErrFromWin32Err( GetLastError() ); + if ( m_errLast == JET_errSuccess ) + { + // Force an error into errLast so we don't write to the file. + m_errLast = JET_errInvalidParameter; + } + } } -CWPRINTFFILE::CWPRINTFFILE( const WCHAR* wszFile ) +// ================================================================ +void __cdecl CPRINTFFILE::operator()( const CHAR* szFormat, ... ) +// ================================================================ { - // open the file for append - - m_hFile = INVALID_HANDLE_VALUE; - m_hMutex = NULL; - m_errLast = JET_errInvalidParameter; - - if ( NULL == wszFile ) + if ( HANDLE( m_hFile ) == INVALID_HANDLE_VALUE ) { return; } - if ( ( m_hFile = (void*)CreateFileW( wszFile, GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL ) ) == INVALID_HANDLE_VALUE ) - { - m_errLast = ErrOSErrFromWin32Err(GetLastError()); - return; - } - if ( ERROR_ALREADY_EXISTS != GetLastError() ) + if ( JET_errSuccess != m_errLast ) { - // If we created this file fresh, we need to push in the Unicode byte order mark. - Assert( GetLastError() == ERROR_SUCCESS ); - DWORD cbWritten; - WCHAR rgwchBuf[] = { 0xFEFF }; // little endian byte order mark. - if (!WriteFile( HANDLE( m_hFile ), rgwchBuf, (ULONG)sizeof( WCHAR ), &cbWritten, NULL )) - { - m_errLast = ErrOSErrFromWin32Err(GetLastError()); - CloseHandle( HANDLE( m_hFile ) ); - m_hFile = INVALID_HANDLE_VALUE; - return; - } - } - SetHandleInformation( HANDLE( m_hFile ), HANDLE_FLAG_PROTECT_FROM_CLOSE, HANDLE_FLAG_PROTECT_FROM_CLOSE ); - if ( !( m_hMutex = (void*)CreateMutexW( NULL, FALSE, NULL ) ) ) - { - m_errLast = ErrOSErrFromWin32Err(GetLastError()); - SetHandleInformation( HANDLE( m_hFile ), HANDLE_FLAG_PROTECT_FROM_CLOSE, 0 ); - CloseHandle( HANDLE( m_hFile ) ); - m_hFile = INVALID_HANDLE_VALUE; return; } - SetHandleInformation( HANDLE( m_hMutex ), HANDLE_FLAG_PROTECT_FROM_CLOSE, HANDLE_FLAG_PROTECT_FROM_CLOSE ); - m_errLast = JET_errSuccess; -} -CWPRINTFFILE::~CWPRINTFFILE() -{ - // close the file + const size_t cchBuf = 1024; + CHAR rgchBuf[ cchBuf ]; + ULONG cbData; - m_errLast = JET_errInvalidParameter; + // print into a temp buffer, truncating the string if too large - if ( m_hMutex ) + va_list arg_ptr; + va_start( arg_ptr, szFormat ); + m_errLast = ErrOSStrCbVFormatA( rgchBuf, sizeof( rgchBuf ), szFormat, arg_ptr ); + va_end( arg_ptr ); + + if ( JET_errSuccess != m_errLast ) { - SetHandleInformation( HANDLE( m_hMutex ), HANDLE_FLAG_PROTECT_FROM_CLOSE, 0 ); - CloseHandle( HANDLE( m_hMutex ) ); - m_hMutex = NULL; + // Stop writing after first error + return; } - if ( m_hFile != INVALID_HANDLE_VALUE ) + cbData = LOSStrLengthA( rgchBuf ); + + switch ( m_feEncodingType ) { - SetHandleInformation( HANDLE( m_hFile ), HANDLE_FLAG_PROTECT_FROM_CLOSE, 0 ); - CloseHandle( HANDLE( m_hFile ) ); - m_hFile = INVALID_HANDLE_VALUE; + case FILEENCODING::UTF16: + // A Unicode file, but you're using ASCII printing. We can do that + // simply by calling the WCHAR () operator. Also. Why are you doing + // this? Use Unicode printing. + Expected( fFalse ); + operator()( L"%hs", rgchBuf ); + break; + + case FILEENCODING::ASCII: + // append the string to the file + this->PutBytesInFile_( ( BYTE * )rgchBuf, cbData ); + break; + + default: + Assert( fFalse ); // Not possible, constructor validates this member. + break; } } -// If _UNICODE is defined, then we only want a single function. -// If _UNICODE is not defined, we need two different functions. -#ifndef _UNICODE // ================================================================ -void __cdecl CWPRINTFFILE::operator()( const _TCHAR* szFormat, ... ) +void __cdecl CPRINTFFILE::operator()( const WCHAR* wszFormat, ... ) // ================================================================ { - AssertSz( fFalse, "NYI" ); - // removed basically the same body as CPRINTFFILE::operator(), b/c didn't want to - // upconvert to WCHAR. -} -#endif + if ( HANDLE( m_hFile ) == INVALID_HANDLE_VALUE ) + { + return; + } -// ================================================================ -void __cdecl CWPRINTFFILE::operator()( const WCHAR* wszFormat, ... ) -// ================================================================ -{ - if ( HANDLE( m_hFile ) != INVALID_HANDLE_VALUE - && JET_errSuccess == m_errLast ) + if ( JET_errSuccess != m_errLast ) { - const size_t cchBuf = 1024; - WCHAR rgwchBuf[ cchBuf ]; // 2k on the stack, sheesh + return; + } - // print into a temp buffer, truncating the string if too large + const size_t cchBuf = 1024; + WCHAR rgwchBuf[ cchBuf ]; // 2k on the stack, sheesh + ULONG cbData; - va_list arg_ptr; - va_start( arg_ptr, wszFormat ); - m_errLast = ErrFromStrsafeHr( StringCbVPrintfW( rgwchBuf, sizeof(rgwchBuf), wszFormat, arg_ptr )); - if (JET_errSuccess != m_errLast) - { - // Stop writing after first error - return; - } - va_end( arg_ptr ); + // print into a temp buffer, truncating the string if too large -#if DBG - // UNDONE: Move this to something like VerifyOnlyDOSTextFileLineReturns() , maybe put in osfile, - // as we should be doing this in other places. - for (WCHAR * wszT = wcschr(rgwchBuf, L'\n'); wszT; wszT = wcschr(wszT, L'\n') ) - { - if ( (wszT == rgwchBuf) // this would mean rgchBuf[0] == L'\n', so that's bad. - || - ((wszT + 1 > rgwchBuf) && - (*(wszT-1)) != L'\r') ){ - AssertSz( fFalse, "We've detected someone trying to print a \\n to a file, only \\r\\n is supported as line return!" ); - } - wszT++; // presumes NUL terminated to avoid running off end. - } -#endif + va_list arg_ptr; + va_start( arg_ptr, wszFormat ); + m_errLast = ErrOSStrCbVFormatW( rgwchBuf, sizeof( rgwchBuf ), wszFormat, arg_ptr ); + va_end( arg_ptr ); - // append the string to the file - if (WAIT_OBJECT_0 == WaitForSingleObject( HANDLE( m_hMutex ), INFINITE )) - { - DWORD cbWritten; - const LARGE_INTEGER ibOffset = { 0, 0 }; - if ( (!SetFilePointerEx( HANDLE( m_hFile ), ibOffset, NULL, FILE_END )) - || (!WriteFile( HANDLE( m_hFile ), rgwchBuf, (ULONG)(LOSStrLengthW( rgwchBuf ) * sizeof( WCHAR )), &cbWritten, NULL ))) - { - // Stop writing after first error - m_errLast = ErrOSErrFromWin32Err(GetLastError()); - } - ReleaseMutex( HANDLE( m_hMutex ) ); - } - else - { - // Stop writing after first error - m_errLast = ErrOSErrFromWin32Err(GetLastError()); - } + if ( JET_errSuccess != m_errLast ) + { + // Stop writing after first error + return; + } + + + cbData = LOSStrLengthW( rgwchBuf ) * sizeof( WCHAR ); + + switch ( m_feEncodingType ) + { + case FILEENCODING::UTF16: +#ifdef DEBUG + this->VerifyOnlyDOSTextFileLineReturns_( rgwchBuf ); +#endif + this->PutBytesInFile_( ( BYTE * )rgwchBuf, cbData ); + break; + + case FILEENCODING::ASCII: + // An ASCII file, but you're using Unicode printing. We can do that + // simply by calling the CHAR () operator, but it's potentially expensive + // and lossy. + Expected( fFalse ); + operator()( L"%ls", rgwchBuf ); + break; + + default: + Assert( fFalse ); // Not possible, constructor validates this member. + break; } } @@ -285,7 +396,7 @@ void __cdecl CPRINTFTLSPREFIX::operator()( const CHAR* szFormat, ... ) va_list arg_ptr; va_start( arg_ptr, szFormat ); - StringCbVPrintfA( pchBuf, cchBuf - ( pchBuf - rgchBuf ), szFormat, arg_ptr ); + OSStrCbVFormatA( pchBuf, cchBuf - ( pchBuf - rgchBuf ), szFormat, arg_ptr ); va_end( arg_ptr ); // output the string to the next lower level @@ -293,7 +404,7 @@ void __cdecl CPRINTFTLSPREFIX::operator()( const CHAR* szFormat, ... ) (*m_pcprintf)( "%s", rgchBuf ); } -void CPRINTF::SetThreadPrintfPrefix( _In_ const _TCHAR * szPrefix ) +void CPRINTF::SetThreadPrintfPrefix( _In_ const CHAR * szPrefix ) { Postls()->szCprintfPrefix = szPrefix; } diff --git a/dev/ese/src/os/edbg.cxx b/dev/ese/src/os/edbg.cxx index bebc312d..c81f26bc 100644 --- a/dev/ese/src/os/edbg.cxx +++ b/dev/ese/src/os/edbg.cxx @@ -2,7 +2,6 @@ // Licensed under the MIT License. #include "osstd.hxx" - // we use LoadLibrary in here to test loading our own DLL for EDBGLoad #undef LoadLibraryExW @@ -29,6 +28,11 @@ DEBUG_EXT( name ) VOID name( const PDEBUG_CLIENT pdebugClient, const INT #pragma pop_macro( "Alloc" ) // Allows easier porting from the older wdbgexts-style extensions. +HRESULT +EDBGPrintf( + _In_ PCSTR szFormat, + ... +); #define dprintf EDBGPrintf #ifdef DEBUGGER_EXTENSION @@ -303,13 +307,6 @@ typedef CLRUKResourceUtilityManager<2,DWORD,0,DWORD> CLRUKResourceUtilityMana const INT argc, \ const CHAR * const argv[] ) -HRESULT -DPrintf( - _In_ PCSTR szFormat, - ... -) -; - LOCAL BOOL FFetchGlobalParamsArray( _Deref_out_ CJetParam** prgparam, _Out_ size_t* pcparam ); @@ -16539,17 +16536,17 @@ LOCAL VOID EDBGDumpNodeInfo( CPRINTF * pcprintf, const CPAGE * const pcpage, con { if ( FFetchVariable( (BYTE *)kdf.key.prefix.Pv() + dwOffset, &rgbPrefix, kdf.key.prefix.Cb() ) ) { - (*pcprintf)( _T( "Prefix (%d bytes):%c" ), kdf.key.prefix.Cb(), ( kdf.key.prefix.Cb() > 16 ? '\n' : ' ' ) ); + (*pcprintf)( "Prefix (%d bytes):%c", kdf.key.prefix.Cb(), ( kdf.key.prefix.Cb() > 16 ? '\n' : ' ' ) ); EDBGDumpRawData( pcprintf, rgbPrefix, kdf.key.prefix.Cb(), fFalse ); } else { - (*pcprintf)( _T( "Error: Failed fetching node prefix.\n" ) ); + (*pcprintf)( "Error: Failed fetching node prefix.\n" ); } } else { - (*pcprintf)( _T( "Prefix: \n" ) ); + (*pcprintf)( "Prefix: \n" ); } // fetch and dump suffix, if any @@ -16558,17 +16555,17 @@ LOCAL VOID EDBGDumpNodeInfo( CPRINTF * pcprintf, const CPAGE * const pcpage, con { if ( FFetchVariable( (BYTE *)kdf.key.suffix.Pv() + dwOffset, &rgbSuffix, kdf.key.suffix.Cb() ) ) { - (*pcprintf)( _T( "Suffix (%d bytes):%c" ), kdf.key.suffix.Cb(), ( kdf.key.suffix.Cb() > 16 ? '\n' : ' ' ) ); + (*pcprintf)( "Suffix (%d bytes):%c", kdf.key.suffix.Cb(), ( kdf.key.suffix.Cb() > 16 ? '\n' : ' ' ) ); EDBGDumpRawData( pcprintf, rgbSuffix, kdf.key.suffix.Cb(), fFalse ); } else { - (*pcprintf)( _T( "Error: Failed fetching node suffix.\n" ) ); + (*pcprintf)( "Error: Failed fetching node suffix.\n" ); } } else { - (*pcprintf)( _T( "Suffix: \n" ) ); + (*pcprintf)( "Suffix: \n" ); } // only fetch data if not performing key-only dump, @@ -16580,13 +16577,13 @@ LOCAL VOID EDBGDumpNodeInfo( CPRINTF * pcprintf, const CPAGE * const pcpage, con { if ( !FFetchVariable( (BYTE *)kdf.data.Pv() + dwOffset, &rgbData, kdf.data.Cb() ) ) { - (*pcprintf)( _T( "Error: Failed fetching node data.\n" ) ); + (*pcprintf)( "Error: Failed fetching node data.\n" ); goto HandleError; } } else { - (*pcprintf)( _T( "Data: \n" ) ); + (*pcprintf)( "Data: \n" ); goto HandleError; } } @@ -16599,7 +16596,7 @@ LOCAL VOID EDBGDumpNodeInfo( CPRINTF * pcprintf, const CPAGE * const pcpage, con if ( !pcpage->FLeafPage() ) { (*pcprintf)( - _T( "Page Pointer: %d (0x%x)\n" ), + "Page Pointer: %d (0x%x)\n", (PGNO)*((LittleEndian*)rgbData), (PGNO)*((LittleEndian*)rgbData) ); fDumpRawData = fFalse; @@ -16613,7 +16610,7 @@ LOCAL VOID EDBGDumpNodeInfo( CPRINTF * pcprintf, const CPAGE * const pcpage, con if( ErrSPREPAIRValidateSpaceNode( &kdf, &pgnoLast, &cpgExtent, &wszPoolName ) >= JET_errSuccess ) { (*pcprintf)( - _T( "Space Data (%d bytes): Pool:%ws, cpg:%d, page range:%d-%d\n" ), + "Space Data (%d bytes): Pool:%ws, cpg:%d, page range:%d-%d\n", kdf.data.Cb(), wszPoolName, cpgExtent, @@ -16622,19 +16619,19 @@ LOCAL VOID EDBGDumpNodeInfo( CPRINTF * pcprintf, const CPAGE * const pcpage, con } else { - (*pcprintf)( _T( "Space Data (%d bytes): \n" ), kdf.data.Cb() ); + (*pcprintf)( "Space Data (%d bytes): \n", kdf.data.Cb() ); fDumpRawData = fTrue; } } else if ( pcpage->FLongValuePage() ) { - (*pcprintf)( _T( "Long-Value Data (%d bytes):\n" ), kdf.data.Cb() ); + (*pcprintf)( "Long-Value Data (%d bytes):\n", kdf.data.Cb() ); EDBGDumpRawData( pcprintf, rgbData, kdf.data.Cb(), fTrue ); fDumpRawData = fFalse; } else if ( pcpage->FIndexPage() ) { - (*pcprintf)( _T( "Primary Bookmark (%d bytes):%c" ), kdf.data.Cb(), ( kdf.data.Cb() > 16 ? '\n' : ' ' ) ); + (*pcprintf)( "Primary Bookmark (%d bytes):%c", kdf.data.Cb(), ( kdf.data.Cb() > 16 ? '\n' : ' ' ) ); EDBGDumpRawData( pcprintf, rgbData, kdf.data.Cb(), fFalse ); fDumpRawData = fFalse; } @@ -16669,7 +16666,7 @@ LOCAL VOID EDBGDumpNodeInfo( CPRINTF * pcprintf, const CPAGE * const pcpage, con dprintf( "WARNING: Could not retrieve table metadata on pfcb = %p, so will be missing some column data.\n", Pdls()->PfcbCurrentTableDebuggee() ); } - (*pcprintf)( _T( "Data Record (%d bytes):\n"), kdf.data.Cb() ); + (*pcprintf)( "Data Record (%d bytes):\n", kdf.data.Cb() ); // Note: pfcbTable ? pfucbSchemaOnly : NULL is _correct_. We are just using the pfucbSchemaOnly to // pass the FCB really, as that's what DBUTLDumpRec() expects. @@ -16683,12 +16680,12 @@ LOCAL VOID EDBGDumpNodeInfo( CPRINTF * pcprintf, const CPAGE * const pcpage, con // if ( fDumpRawData ) { - (*pcprintf)( _T( "Raw Data (%d bytes):\n"), kdf.data.Cb() ); + (*pcprintf)( "Raw Data (%d bytes):\n", kdf.data.Cb() ); EDBGDumpRawData( pcprintf, rgbData, kdf.data.Cb(), fTrue ); } HandleError: - (*pcprintf)( _T( "\n" ) ); + (*pcprintf)( "\n" ); Unfetch( rgbPrefix ); Unfetch( rgbSuffix ); Unfetch( rgbData ); @@ -21261,7 +21258,7 @@ HRESULT CALLBACK ese( DEBUGGER_LOCAL_STORE::DlsDestroy(); LocalFree( pv ); } - EXCEPT( fDebugMode ? ExceptionFail( _T( "ESE Debugger Extension" ) ) : efaContinueSearch ) + EXCEPT( fDebugMode ? ExceptionFail( "ESE Debugger Extension" ) : efaContinueSearch ) { DEBUGGER_LOCAL_STORE::DlsDestroy(); AssertPREFIX( !"This code path should be impossible (the exception-handler should have terminated the process)." ); diff --git a/dev/ese/src/os/encrypt.cxx b/dev/ese/src/os/encrypt.cxx index caa71140..4a36510b 100644 --- a/dev/ese/src/os/encrypt.cxx +++ b/dev/ese/src/os/encrypt.cxx @@ -185,7 +185,7 @@ Crc32Checksum( HCRYPTPROV g_hAESProv = NULL; -CCriticalSection g_critAESProv( CLockBasicInfo( CSyncBasicInfo( _T( "g_critAESProv" ) ), rankAESProv, 0 ) ); +CCriticalSection g_critAESProv( CLockBasicInfo( CSyncBasicInfo( "g_critAESProv" ), rankAESProv, 0 ) ); #define BlockSizeAes256 16 BOOL FOSEncryptionPreinit() diff --git a/dev/ese/src/os/error.cxx b/dev/ese/src/os/error.cxx index e7400e33..660bb1fd 100644 --- a/dev/ese/src/os/error.cxx +++ b/dev/ese/src/os/error.cxx @@ -408,22 +408,24 @@ VOID ERRFormatIssueSource( __out_bcount( cbIssueSource ) WCHAR * wszIssueSource, // WARNING: This code was designed and tested to be 100% resilient to all the worse // parameters and so if you change it, retest as this is retail code. - OSStrCbFormatW( wszIssueSource, cbIssueSource, - L"PV: %u.%u.%u.%u SV: %u.%u.%u.%u GLE: %u ERR: %d(%hs%hs:%u): %hs%hs(%u)", + ERR errT = ErrOSStrCbFormatW( wszIssueSource, cbIssueSource, + L"PV: %u.%u.%u.%u SV: %u.%u.%u.%u GLE: %u ERR: %d(%hs%hs:%u): %hs%hs(%u)", #ifndef TEST_ONCE_FORMAT_MAX - DwUtilImageVersionMajor(), DwUtilImageVersionMinor(), DwUtilImageBuildNumberMajor(), DwUtilImageBuildNumberMinor(), - DwUtilSystemVersionMajor(), DwUtilSystemVersionMinor(), DwUtilSystemBuildNumber(), DwUtilSystemServicePackNumber(), - dwSavedGLE, - errLast, szFilenameLastErrPre, szFilenameLastErr, lErrLastLine, - szFilenameSourcePre, szFilenameSource, lLine + DwUtilImageVersionMajor(), DwUtilImageVersionMinor(), DwUtilImageBuildNumberMajor(), DwUtilImageBuildNumberMinor(), + DwUtilSystemVersionMajor(), DwUtilSystemVersionMinor(), DwUtilSystemBuildNumber(), DwUtilSystemServicePackNumber(), + dwSavedGLE, + errLast, szFilenameLastErrPre, szFilenameLastErr, lErrLastLine, + szFilenameSourcePre, szFilenameSource, lLine #else - xWorstDword, xWorstDword, xWorstDword, xWorstDword, - xWorstDword, xWorstDword, xWorstDword, xWorstDword, - xWorstDword, - xWorstInt, szFilenameLastErrPre, szFilenameLastErr, xWorstDword, - szFilenameSourcePre, szFilenameSource, xWorstDword + xWorstDword, xWorstDword, xWorstDword, xWorstDword, + xWorstDword, xWorstDword, xWorstDword, xWorstDword, + xWorstDword, + xWorstInt, szFilenameLastErrPre, szFilenameLastErr, xWorstDword, + szFilenameSourcePre, szFilenameSource, xWorstDword #endif - ); + ); + + CallS( errT ); } @@ -647,7 +649,7 @@ void __stdcall AssertFail( PCSTR szMessageFormat, PCSTR szFilename, LONG lLine, // { - CPRINTFFILE cprintffileAssertTxt( wszAssertFile ); + CPRINTFFILE cprintffileAssertTxt( wszAssertFile, CPRINTFFILE::FILEENCODING::ASCII ); cprintffileAssertTxt( "%ws", g_wszAssertTextFull ); } @@ -1062,7 +1064,7 @@ LOCAL_BROKEN BOOL ExceptionDialog( const WCHAR wszException[] ) const WCHAR wszPidHdr[] = L"PID: "; const WCHAR wszTidHdr[] = L", TID: 0x"; - OSStrCbFormatW( wszMessage, sizeof(wszMessage), wszFmt, + (VOID)ErrOSStrCbFormatW( wszMessage, sizeof(wszMessage), wszFmt, wszReleaseHdr, DwUtilImageBuildNumberMajor(), DwUtilImageBuildNumberMinor(), @@ -1216,7 +1218,7 @@ EExceptionFilterAction _ExceptionFail( const CHAR* szMessage, EXCEPTION exceptio // print the exception information and callstack to our assert file { - CPRINTFFILE cprintffileAssert( wszAssertFile ); + CPRINTFFILE cprintffileAssert( wszAssertFile, CPRINTFFILE::FILEENCODING::ASCII ); cprintffileAssert( "JET Exception: Function \"%hs\" raised exception 0x%08X (%ws) at address 0x%0*I64X (base:0x%0*I64X, exr:0x%0*I64X, cxr:0x%0*I64X).", szMessage, diff --git a/dev/ese/src/os/memory.cxx b/dev/ese/src/os/memory.cxx index 24a2af4d..e623ae07 100644 --- a/dev/ese/src/os/memory.cxx +++ b/dev/ese/src/os/memory.cxx @@ -2024,7 +2024,7 @@ VOID SprintHex( { if ( 0 != cbAddress ) { - StringCbPrintfA( sz, cbDest-(sz-szDest), "%*.*lx ", cbAddress, cbAddress, (INT)(pb - rgbSrc) + cbStart ); + OSStrCbFormatA( sz, cbDest-(sz-szDest), "%*.*lx ", cbAddress, cbAddress, (INT)(pb - rgbSrc) + cbStart ); sz += strlen( sz ); } CHAR * szHex = sz; diff --git a/dev/ese/src/os/osblockcache.cxx b/dev/ese/src/os/osblockcache.cxx index ae545b48..06081f1c 100644 --- a/dev/ese/src/os/osblockcache.cxx +++ b/dev/ese/src/os/osblockcache.cxx @@ -272,7 +272,7 @@ void CCachedBlockSlot::DumpFile( _In_ const CCachedBlockSlot& slot, wszAnyAbsPath, wszKeyPath ); - (*pcprintf)( _T( " // file %ws at offset 0x%016I64x for 0x%08x bytes" ), + (*pcprintf)( " // file %ws at offset 0x%016I64x for 0x%08x bytes", wszAnyAbsPath[0] ? wszAnyAbsPath : L"", (QWORD)slot.Cbid().Cbno() * cbCachedBlock, cbCachedBlock ); @@ -340,4 +340,4 @@ void OSBlockCachePostterm() CFileWrapper::Cleanup(); CFileFilterWrapper::Cleanup(); CFileFilter::Cleanup(); -} \ No newline at end of file +} diff --git a/dev/ese/src/os/osfile.cxx b/dev/ese/src/os/osfile.cxx index a9a4de36..9462be61 100644 --- a/dev/ese/src/os/osfile.cxx +++ b/dev/ese/src/os/osfile.cxx @@ -10,7 +10,6 @@ #include - //////////////////////////////////////// // Support Functions @@ -394,8 +393,8 @@ COSFile::COSFile() : m_posv( NULL ), m_hFile( INVALID_HANDLE_VALUE ), m_p_osf( NULL ), - m_semChangeFileSize( CSyncBasicInfo( _T( "COSFile::m_semChangeFileSize" ) ) ), - m_critDefer( CLockBasicInfo( CSyncBasicInfo( _T( "COSFile::m_critDefer" ) ), 0, 0 ) ), + m_semChangeFileSize( CSyncBasicInfo( "COSFile::m_semChangeFileSize" ) ), + m_critDefer( CLockBasicInfo( CSyncBasicInfo( "COSFile::m_critDefer" ), 0, 0 ) ), m_fmf( fmfNone ), m_cioUnflushed( 0 ), m_cioFlushing( 0 ), @@ -3190,4 +3189,4 @@ TICK COSFile::DtickIOElapsed( void* const pvIOContext ) const QWORD cmsecIOElapsed = CmsecLatencyOfOSOperation( pioreq ); return (TICK)min( cmsecIOElapsed, dwMax ); -} \ No newline at end of file +} diff --git a/dev/ese/src/os/osfs.cxx b/dev/ese/src/os/osfs.cxx index 8d368dbd..d97ceb8e 100644 --- a/dev/ese/src/os/osfs.cxx +++ b/dev/ese/src/os/osfs.cxx @@ -325,7 +325,7 @@ VOID COSFileSystem::ReportFileErrorWithFilter( COSFileSystem::COSFileSystem( IFileSystemConfiguration * const pfsconfig ) : m_pfsconfig( pfsconfig ), - m_critVolumePathCache( CLockBasicInfo( CSyncBasicInfo( _T( "COSFileSystem::m_critVolumePathCache" ) ), 0, 0 ) ) + m_critVolumePathCache( CLockBasicInfo( CSyncBasicInfo( "COSFileSystem::m_critVolumePathCache" ), 0, 0 ) ) { } diff --git a/dev/ese/src/os/osstd_.hxx b/dev/ese/src/os/osstd_.hxx index b482e398..f3286be7 100644 --- a/dev/ese/src/os/osstd_.hxx +++ b/dev/ese/src/os/osstd_.hxx @@ -9,13 +9,6 @@ #include #include -#pragma prefast(push) -#pragma prefast(disable:26006, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:26007, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:28718, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:28726, "Dont bother us with tchar, someone else owns that.") -#include -#pragma prefast(pop) #include #include #include diff --git a/dev/ese/src/os/ostimerqueue.cxx b/dev/ese/src/os/ostimerqueue.cxx index 7d78fbd6..15554ac9 100644 --- a/dev/ese/src/os/ostimerqueue.cxx +++ b/dev/ese/src/os/ostimerqueue.cxx @@ -148,7 +148,7 @@ COSTimerTaskEntry::COSTimerTaskEntry( m_pvTaskGroupContext( (VOID *)pvTaskGroupContext ), m_pvTaskRuntimeContext( NULL ), m_semExec( CSyncBasicInfo( "COSTimerTaskEntry::m_semExec" ) ), - m_critSchedule( CLockBasicInfo( CSyncBasicInfo( _T( "m_critSchedule" ) ), rankTimerTaskEntry, 0 ) ), + m_critSchedule( CLockBasicInfo( CSyncBasicInfo( "m_critSchedule" ), rankTimerTaskEntry, 0 ) ), m_idSchedule( 0 ), m_idRun( 0 ), m_tidExec( DWORD( ~0 ) ), @@ -319,7 +319,7 @@ typedef CInvasiveList< COSTimerTaskEntry, COSTimerTaskEntry::OffsetOfILE > COSTi COSTimerTaskEntryList g_ilTimerTaskList; -CCriticalSection g_critTimerTaskList( CLockBasicInfo( CSyncBasicInfo( _T( "g_critTimerTaskList" ) ), rankTimerTaskList, 0 ) ); +CCriticalSection g_critTimerTaskList( CLockBasicInfo( CSyncBasicInfo( "g_critTimerTaskList" ), rankTimerTaskList, 0 ) ); // creates a task timer object diff --git a/dev/ese/src/os/string.cxx b/dev/ese/src/os/string.cxx index 69f7d5c6..89d2d008 100644 --- a/dev/ese/src/os/string.cxx +++ b/dev/ese/src/os/string.cxx @@ -3,85 +3,173 @@ #include "osstd.hxx" -// Undefine these tokens. They are defined such that you get an error -// when you try to use certain APIs, but this file implements the -// indirect APIs you should use instead, and thus needs access to the -// native APIs. -#ifdef wcslen -#undef wcslen -#endif - -#ifdef wcscmp -#undef wcscmp -#endif - -#ifdef wcsncmp -#undef wcsncmp -#endif +#pragma prefast(push) +#pragma prefast(disable:28196, "Do not bother us with strsafe, someone else owns that.") +#pragma prefast(disable:28205, "Do not bother us with strsafe, someone else owns that.") +#include +#pragma prefast(pop) ERR ErrFromStrsafeHr ( HRESULT hr) { - ERR err = (hr == SEC_E_OK) ? - JET_errSuccess : - (hr == STRSAFE_E_INSUFFICIENT_BUFFER) ? - ErrERRCheck(JET_errBufferTooSmall) : - (hr == STRSAFE_E_INVALID_PARAMETER) ? - ErrERRCheck(JET_errInvalidParameter) : - ErrERRCheck(JET_errInternalError); + ERR err; + + switch ( hr ) + { + case SEC_E_OK: + err = JET_errSuccess; + break; + + case STRSAFE_E_INSUFFICIENT_BUFFER: + err = ErrERRCheck( JET_errBufferTooSmall ); + break; + + case STRSAFE_E_INVALID_PARAMETER: + err = ErrERRCheck( JET_errInvalidParameter ); + break; + + default: + err = ErrERRCheck( JET_errInternalError ); + break; + } + CallSx( err, JET_errBufferTooSmall ); // this is the only really expected error return(err); } -// get the length of the string +// For the LOSStrLength* functions: +// Get the length of the string in count of characters. +// +// * Note the unusual usage. Most of our string handling uses count of bytes. +// Historically, however, string length is returned as count of characters. +// +// * Because the length is returned as count of characters, the input parameter +// cchMax is also in count of characters. +// +// * If caller does not supply a value for the parameter cchMax, a default is +// used (see string.hxx). The default indicates we expect the string to be +// NULL terminated in a reasonable number of characters. In practice, reasonable +// means fewer characters than the max that StringCchLengthFoo can handle. +// +// * The default can not be the token STRSAFE_MAX_CCH, since that token is defined +// in a Windows specific header file and cannot be referenced in our OS abstraction +// headers. The value ulMax is used. +// +// * If the value of the parameter cchMax is greater than STRSAFE_MAX_CCH, STRSAFE_MAX_CCH +// is used. We only expect that to happen if the value is ulMax. +// +// * Obviously, ulMax must be greater than STRSAFE_MAX_CCH. + +static_assert( ulMax > STRSAFE_MAX_CCH ); -LONG LOSStrLengthA( _In_ PCSTR const sz ) +// Note the unusual usage. Most of our string handling uses count of bytes. +// Historically, however, string length is returned as count of characters. +LONG LOSStrLengthA( + _In_ PCSTR const sz, + _In_ ULONG cchMax ) { - // According to Windows OACR, strlen cannot handle NULL. + SIZE_T cchLength; + SIZE_T cchMaxUsed; + HRESULT hr; + + // StringCchLengthA returns an error on a NULL pointer. if ( NULL == sz ) { return 0; } - return strlen( sz ); + if ( cchMax > STRSAFE_MAX_CCH ) + { + Expected( cchMax == ulMax ); + cchMaxUsed = STRSAFE_MAX_CCH; + } + else + { + cchMaxUsed = cchMax; + } + + hr = StringCchLengthA( sz, cchMaxUsed, &cchLength ); + // We never expect this to fail. + Assert( JET_errSuccess == ErrFromStrsafeHr( hr ) ); + + Assert( cchLength <= lMax ); + + return (LONG)cchLength; } -LONG LOSStrLengthW( _In_ PCWSTR const wsz ) + +// Note the unusual usage. Most of our string handling uses count of bytes. +// Historically, however, string length is returned as count of characters. +LONG LOSStrLengthW( + _In_ PCWSTR const wsz, + _In_ ULONG cchMax ) { - // According to Windows OACR, wcslen cannot handle NULL. + SIZE_T cchLength; + SIZE_T cchMaxUsed; + HRESULT hr; + + // StringCchLengthW returns an error on a NULL pointer. if ( NULL == wsz ) { return 0; } - return wcslen( wsz ); + if ( cchMax > STRSAFE_MAX_CCH ) + { + Expected( cchMax == ulMax ); + cchMaxUsed = STRSAFE_MAX_CCH; + } + else + { + cchMaxUsed = cchMax; + } + + hr = StringCchLengthW( wsz, cchMaxUsed, &cchLength ); + // We never expect this to fail. + Assert( !ErrFromStrsafeHr( hr ) ); + + Assert( cchLength <= cchMax ); + + return (LONG)cchLength; } -LONG LOSStrLengthUnalignedW( _In_ const UnalignedLittleEndian< WCHAR > * wsz ) +// Note the unusual usage. Most of our string handling uses count of bytes. +// Historically, however, string length is returned as count of characters. +LONG LOSStrLengthUnalignedW( + _In_ const UnalignedLittleEndian< WCHAR > * wsz, + _In_ ULONG cchMax ) { - LONG cchCurrent = 0; - const UnalignedLittleEndian< WCHAR > * wszCurrent = wsz; + SIZE_T cchLength; + SIZE_T cchMaxUsed; + HRESULT hr; if ( NULL == wsz ) { return 0; } - // Could we do - // if ( 0 == ( wsz % sizeof(WCHAR) ) ) - // or - // if ( isAligned( wsz, WCHAR ) ) - // { - // return wcslen( wsz ); - // } - while ( wszCurrent[ cchCurrent ] != L'\0' ) + if ( cchMax > STRSAFE_MAX_CCH ) { - cchCurrent++; + Expected( cchMax == ulMax ); + cchMaxUsed = STRSAFE_MAX_CCH; } + else + { + cchMaxUsed = cchMax; + } + + hr = UnalignedStringCchLengthW( ( PCWSTR )wsz, cchMaxUsed, &cchLength ); + // We never expect this to fail. + Assert( !ErrFromStrsafeHr( hr ) ); - return cchCurrent; + Assert( cchLength <= cchMax ); + + return (LONG)cchLength; } -LONG LOSStrLengthMW( _In_ PCWSTR const wsz ) +// Note the unusual usage. Most of our string handling uses count of bytes. +// Historically, however, string length is returned as count of characters. +LONG LOSStrLengthMW( + _In_ PCWSTR const wsz ) { LONG cchCurrent = 0; PCWSTR wszCurrent = wsz; @@ -104,56 +192,65 @@ LONG LOSStrLengthMW( _In_ PCWSTR const wsz ) // That means byte for byte equality. If the first string is "less than" the second string, -1 // is returned. If the strings are "equal", 0 is returned. If the first string is "greater than" // the second string, +1 is returned. -LONG LOSStrCompareA( _In_ PCSTR const szStr1, _In_ PCSTR const szStr2, _In_ const ULONG cchMax ) +// +// Note the unusual usage. Most of our string handling uses count of bytes. +// Historically, however, string compare is limited by count of characters. +LONG LOSStrCompareA( + _In_ PCSTR const szStr1, + _In_ PCSTR const szStr2, + _In_ const ULONG cchMax ) { LONG lCmp; + PCSTR szStrUsed1; + PCSTR szStrUsed2; + if ( 0 == cchMax ) { // Why are you doing this? return 0; } - if ( ( NULL == szStr1 ) || ( NULL == szStr2 ) ) + // We treat NULL pointers as 0 length strings. + if ( NULL == szStr1 ) { - // strcmp, strlen, and strncmp don't play well with NULLs. - // NULLs are treated as 0 length strings, and we're sure that - // cchmax is greater than 0, so a non-NULL string is longer. - LONG_PTR lpCmp = (LONG_PTR)szStr1 - (LONG_PTR)szStr2; + szStrUsed1 = ""; + } + else + { + szStrUsed1 = szStr1; + } - if ( lpCmp > 0 ) - { - lCmp = +1; - } - else if ( lpCmp < 0 ) + if ( NULL == szStr2 ) + { + szStrUsed2 = ""; + } + else + { + szStrUsed2 = szStr2; + } + +#if 0 + if ( fIgnoreCase ) + { + if ( cchMax == -1 ) { - lCmp = -1; + lCmp = _stricmp( szStrUsed1, szStrUsed2 ); } else { - lCmp = 0; + lCmp = _strnicmp( szStrUsed1, szStrUsed2, cchMax ); } } - else if (~ULONG(0) == cchMax ) - { - // Simple path when caller doesn't supply a character count limit. - // We don't have to get the string lengths. - lCmp = strcmp( szStr1, szStr2 ); - } else +#endif { - ULONG cch1 = strlen( szStr1 ); - ULONG cch2 = strlen( szStr2 ); - - ULONG cchToCompare = min( max( cch1, cch2 ), cchMax ); - - if ( cchToCompare < cchMax ) + if ( cchMax == -1 ) { - // Semi-simple path when the provided strings are both shorter than supplied max. - lCmp = strcmp( szStr1, szStr2 ); + lCmp = strcmp( szStrUsed1, szStrUsed2 ); } else { - lCmp = strncmp( szStr1, szStr2, cchToCompare ); + lCmp = strncmp( szStrUsed1, szStrUsed2, cchMax ); } } @@ -165,145 +262,186 @@ LONG LOSStrCompareA( _In_ PCSTR const szStr1, _In_ PCSTR const szStr2, _In_ cons // That means byte for byte equality. If the first string is "less than" the second string, -1 // is returned. If the strings are "equal", 0 is returned. If the first string is "greater than" // the second string, +1 is returned. -LONG LOSStrCompareW( _In_ PCWSTR const wszStr1, _In_ PCWSTR const wszStr2, _In_ const ULONG cchMax ) +// +// Note the unusual usage. Most of our string handling uses count of bytes. +// Historically, however, string compare is limited by count of characters. +LONG LOSStrCompareW( + _In_ PCWSTR const wszStr1, + _In_ PCWSTR const wszStr2, + _In_ const ULONG cchMax ) { LONG lCmp; + PCWSTR wszStrUsed1; + PCWSTR wszStrUsed2; + if ( 0 == cchMax ) { // Why are you doing this? return 0; } - if ( ( NULL == wszStr1 ) || ( NULL == wszStr2 ) ) + // We treat NULL pointers as 0 length strings. + if ( NULL == wszStr1 ) { - // wcscmp, wcslen, and wcsncmp don't play well with NULLs. - // NULLs are treated as 0 length strings, and we're sure that - // cchmax is greater than 0, so a non-NULL string is longer. - LONG_PTR lpCmp = (LONG_PTR)wszStr1 - (LONG_PTR)wszStr2; + wszStrUsed1 = L""; + } + else + { + wszStrUsed1 = wszStr1; + } - if ( lpCmp > 0 ) - { - lCmp = 1; - } - else if ( lpCmp < 0 ) + if ( NULL == wszStr2 ) + { + wszStrUsed2 = L""; + } + else + { + wszStrUsed2 = wszStr2; + } + +#if 0 + if ( fIgnoreCase ) + { + if ( cchMax == -1 ) { - lCmp = -1; + lCmp = _wcsicmp( wszStrUsed1, wszStrUsed2 ); } else { - lCmp = 0; + lCmp = _wcsnicmp( wszStrUsed1, wszStrUsed2, cchMax ); } } - else if (~ULONG(0) == cchMax ) - { - // Simple path when caller doesn't supply a character count limit. - // We don't have to get the string lengths. - lCmp = wcscmp( wszStr1, wszStr2 ); - } else +#endif { - ULONG cch1 = wcslen( wszStr1 ); - ULONG cch2 = wcslen( wszStr2 ); - - ULONG cchToCompare = min( max( cch1, cch2 ), cchMax ); - - if ( cchToCompare < cchMax ) + if ( cchMax == -1 ) { - // Semi-simple path when the provided strings are both shorter than supplied max. - lCmp = wcscmp( wszStr1, wszStr2 ); + lCmp = wcscmp( wszStrUsed1, wszStrUsed2 ); } else { - lCmp = wcsncmp( wszStr1, wszStr2, cchToCompare ); + lCmp = wcsncmp( wszStrUsed1, wszStrUsed2, cchMax ); } } return lCmp; } +ERR ErrOSStrCbCopyA( + _In_ PSTR szDst, + _In_ SIZE_T cbDst, + _In_ PCSTR szSrc ) +{ + return ErrFromStrsafeHr( StringCbCopyA( szDst, cbDst, szSrc ) ); +} +ERR ErrOSStrCbCopyW( + _In_ PWSTR wszDst, + _In_ SIZE_T cbDst, + _In_ PCWSTR wszSrc ) +{ + return ErrFromStrsafeHr( StringCbCopyW( wszDst, cbDst, wszSrc ) ); +} -// create a formatted string in a given buffer -void __cdecl OSStrCbVFormatA ( __out_bcount(cbBuffer) PSTR szBuffer, size_t cbBuffer, __format_string PCSTR szFormat, va_list alist ) +ERR ErrOSStrCbAppendA( + _In_ PSTR szDst, + _In_ SIZE_T cbDst, + _In_ PCSTR szSrc ) { - HRESULT hr = StringCbVPrintf( szBuffer, cbBuffer, szFormat, alist ); -#ifdef DEBUG - CallS( ErrFromStrsafeHr( hr ) ); -#endif + return ErrFromStrsafeHr( StringCbCatA( szDst, cbDst, szSrc ) ); } -void __cdecl OSStrCbFormatA ( __out_bcount(cbBuffer) PSTR szBuffer, size_t cbBuffer, __format_string PCSTR szFormat, ...) +ERR ErrOSStrCbAppendW( + _In_ PWSTR wszDst, + _In_ SIZE_T cbDst, + _In_ PCWSTR wszSrc ) { - va_list alist; - va_start( alist, szFormat ); - HRESULT hr = StringCbVPrintf( szBuffer, cbBuffer, szFormat, alist ); -#ifdef DEBUG - CallS( ErrFromStrsafeHr( hr ) ); -#endif - va_end( alist ); + return ErrFromStrsafeHr( StringCbCatW( wszDst, cbDst, wszSrc ) ); } // create a formatted string in a given buffer +ERR __cdecl ErrOSStrCbVFormatA ( + _Out_writes_bytes_(cbBuffer) PSTR szBuffer, + SIZE_T cbBuffer, + __format_string PCSTR szFormat, + va_list alist ) +{ + HRESULT hr = StringCbVPrintfA( szBuffer, cbBuffer, szFormat, alist ); + return ErrFromStrsafeHr( hr ); +} -void __cdecl OSStrCbFormatW ( __out_bcount(cbBuffer) PWSTR szBuffer, size_t cbBuffer, __format_string PCWSTR szFormat, ...) +// create a formatted string in a given buffer +ERR __cdecl ErrOSStrCbVFormatW ( + _Out_writes_bytes_(cbBuffer) PWSTR szBuffer, + SIZE_T cbBuffer, + __format_string PCWSTR szFormat, + va_list alist ) { - va_list alist; - va_start( alist, szFormat ); HRESULT hr = StringCbVPrintfW( szBuffer, cbBuffer, szFormat, alist ); -#ifdef DEBUG - CallS( ErrFromStrsafeHr( hr ) ); -#endif - va_end( alist ); + return ErrFromStrsafeHr( hr ); } // create a formatted string in a given buffer - -ERR __cdecl ErrOSStrCbFormatA ( __out_bcount(cbBuffer) PSTR szBuffer, size_t cbBuffer, __format_string PCSTR szFormat, ...) +ERR __cdecl ErrOSStrCbFormatA ( + _Out_writes_bytes_(cbBuffer) PSTR szBuffer, + SIZE_T cbBuffer, + __format_string PCSTR szFormat, + ...) { va_list alist; va_start( alist, szFormat ); HRESULT hr = StringCbVPrintf( szBuffer, cbBuffer, szFormat, alist ); va_end( alist ); - return( ErrFromStrsafeHr(hr) ); + return ErrFromStrsafeHr( hr ); } // create a formatted string in a given buffer - -ERR __cdecl ErrOSStrCbFormatW ( __out_bcount(cbBuffer) PWSTR szBuffer, size_t cbBuffer, __format_string PCWSTR szFormat, ...) +ERR __cdecl ErrOSStrCbFormatW ( + _Out_writes_bytes_(cbBuffer) PWSTR szBuffer, + SIZE_T cbBuffer, + __format_string PCWSTR szFormat, + ...) { va_list alist; va_start( alist, szFormat ); HRESULT hr = StringCbVPrintfW( szBuffer, cbBuffer, szFormat, alist ); va_end( alist ); - return( ErrFromStrsafeHr(hr) ); + return ErrFromStrsafeHr( hr ); } // find the first occurrence of the given character in the given string and // return a pointer to that character. NULL is returned when the character // is not found. -VOID OSStrCharFindA( _In_ PCSTR const szStr, const char ch, _Outptr_result_maybenull_ PSTR * const pszFound ) +VOID OSStrCharFindA( + _In_ PCSTR const szStr, + const CHAR ch, + _Outptr_result_maybenull_ PSTR * const pszFound ) { - const char* const szFound = szStr; + const CHAR* const szFound = szStr; if ( szFound ) { - *pszFound = (char *)strchr( szStr, ch ); + *pszFound = (CHAR *)strchr( szStr, ch ); } else { *pszFound = NULL; } } -VOID OSStrCharFindW( _In_ PCWSTR const wszStr, const wchar_t wch, _Outptr_result_maybenull_ PWSTR * const pwszFound ) + +VOID OSStrCharFindW( + _In_ PCWSTR const wszStr, + const WCHAR wch, + _Outptr_result_maybenull_ PWSTR * const pwszFound ) { - const wchar_t *wszFound = wszStr; + const WCHAR *wszFound = wszStr; if ( wszFound ) { while ( L'\0' != *wszFound && wch != *wszFound ) { wszFound++; } - *pwszFound = const_cast< wchar_t *const >( wch == *wszFound ? wszFound : NULL ); + *pwszFound = const_cast< WCHAR *const >( wch == *wszFound ? wszFound : NULL ); } else { @@ -316,20 +454,27 @@ VOID OSStrCharFindW( _In_ PCWSTR const wszStr, const wchar_t wch, _Outptr_result // return a pointer to that character. NULL is returned when the character // is not found. -VOID OSStrCharFindReverseA( _In_ PCSTR const szStr, const char ch, _Outptr_result_maybenull_ PSTR * const pszFound ) +VOID OSStrCharFindReverseA( + _In_ PCSTR const szStr, + const CHAR ch, + _Outptr_result_maybenull_ PSTR * const pszFound ) { Assert( '\0' != ch ); - const char* const szFound = szStr; + const CHAR* const szFound = szStr; if ( szFound ) { - *pszFound = (char *)strrchr( szStr, ch ); + *pszFound = (CHAR *)strrchr( szStr, ch ); } else { *pszFound = NULL; } } -VOID OSStrCharFindReverseW( _In_ PCWSTR const wszStr, const wchar_t wch, _Outptr_result_maybenull_ PWSTR * const pwszFound ) + +VOID OSStrCharFindReverseW( + _In_ PCWSTR const wszStr, + const WCHAR wch, + _Outptr_result_maybenull_ PWSTR * const pwszFound ) { ULONG ich; ULONG cch; @@ -345,7 +490,7 @@ VOID OSStrCharFindReverseW( _In_ PCWSTR const wszStr, const wchar_t wch, _Outptr { if ( wch == wszStr[ich] ) { - *pwszFound = const_cast< wchar_t* const >( wszStr + ich ); + *pwszFound = const_cast< WCHAR* const >( wszStr + ich ); return; } } @@ -354,7 +499,8 @@ VOID OSStrCharFindReverseW( _In_ PCWSTR const wszStr, const wchar_t wch, _Outptr // check for a trailing path-delimeter -BOOL FOSSTRTrailingPathDelimiterA( _In_ PCSTR const pszPath ) +BOOL FOSSTRTrailingPathDelimiterA( + _In_ PCSTR const pszPath ) { const DWORD cchPath = ( NULL == pszPath ) ? 0 : strlen( pszPath ); @@ -364,9 +510,11 @@ BOOL FOSSTRTrailingPathDelimiterA( _In_ PCSTR const pszPath ) } return fFalse; } -BOOL FOSSTRTrailingPathDelimiterW( _In_ PCWSTR const pwszPath ) + +BOOL FOSSTRTrailingPathDelimiterW( + _In_ PCWSTR const pwszPath ) { - const DWORD cchPath = ( NULL == pwszPath ) ? 0 : wcslen( pwszPath ); + const DWORD cchPath = LOSStrLengthW( pwszPath ); if ( cchPath > 0 ) { @@ -375,7 +523,8 @@ BOOL FOSSTRTrailingPathDelimiterW( _In_ PCWSTR const pwszPath ) return fFalse; } -INLINE LOCAL UINT UlCodePageFromOsstrConversion( const OSSTR_CONVERSION osstrConversion ) +INLINE LOCAL UINT UlCodePageFromOsstrConversion( + const OSSTR_CONVERSION osstrConversion ) { switch( osstrConversion ) { @@ -392,12 +541,13 @@ INLINE LOCAL UINT UlCodePageFromOsstrConversion( const OSSTR_CONVERSION osstrCon } // convert a byte string to a wide-char string - -ERR ErrOSSTRAsciiToUnicode( _In_ PCSTR const pszIn, - _Out_opt_z_cap_post_count_(cwchOut, *pcwchRequired) PWSTR const pwszOut, - const size_t cwchOut, // pass in 0 to only return output buffer size in pcwchRequired, JET_errBufferTooSmall will be returned. - size_t * const pcwchRequired, - const OSSTR_CONVERSION osstrConversion ) +ERR ErrOSSTRAsciiToUnicode( + _In_ PCSTR const pszIn, + _Out_opt_z_cap_post_count_(cwchOut, *pcwchRequired) PWSTR const pwszOut, + const SIZE_T cwchOut, // pass in 0 to only return output buffer size in + // pcwchRequired, JET_errBufferTooSmall will be returned. + SIZE_T * const pcwchRequired, + const OSSTR_CONVERSION osstrConversion ) { // Make sure out params are consistent ... @@ -409,12 +559,13 @@ ERR ErrOSSTRAsciiToUnicode( _In_ PCSTR const pszIn, // try the conversion - const size_t cwchActual = MultiByteToWideChar( UlCodePageFromOsstrConversion( osstrConversion ), - MB_ERR_INVALID_CHARS, - pszIn, - -1, - pwszOut, - cwchOut ); + const SIZE_T cwchActual = MultiByteToWideChar( + UlCodePageFromOsstrConversion( osstrConversion ), + MB_ERR_INVALID_CHARS, + pszIn, + -1, + pwszOut, + (INT)cwchOut ); if ( NULL != pcwchRequired ) *pcwchRequired = cwchActual; @@ -458,8 +609,13 @@ ERR ErrOSSTRAsciiToUnicode( _In_ PCSTR const pszIn, // size 3n, if the caller retries w/ a bigger buffer. // note we pay 2n (1n more than necessary) just to fail if the // caller passes pcwchRequired and didn't consume pcwchRequired. - *pcwchRequired = MultiByteToWideChar( UlCodePageFromOsstrConversion( osstrConversion ), MB_ERR_INVALID_CHARS, - pszIn, -1, NULL, 0 ); + *pcwchRequired = MultiByteToWideChar( + UlCodePageFromOsstrConversion( osstrConversion ), + MB_ERR_INVALID_CHARS, + pszIn, + -1, + NULL, + 0 ); } return ErrERRCheck( JET_errBufferTooSmall ); } @@ -506,12 +662,15 @@ ERR ErrOSSTRAsciiToUnicode( _In_ PCSTR const pszIn, // convert a wide-char string to a byte string -ERR ErrOSSTRUnicodeToAscii( _In_ PCWSTR const pwszIn, - _Out_opt_z_cap_post_count_(cchOut, *pcchRequired) PSTR const pszOut, - const size_t cchOut, // pass in 0 to only return output buffer size in pcchRequired, JET_errBufferTooSmall will be returned. - size_t * const pcchRequired, - const OSSTR_LOSSY fLossy, // CAUTION: setting this will allow return JET_errSuccess if chars were translated to ? - const OSSTR_CONVERSION osstrConversion ) +ERR ErrOSSTRUnicodeToAscii( + _In_ PCWSTR const pwszIn, + _Out_opt_z_cap_post_count_(cchOut, *pcchRequired) PSTR const pszOut, + const SIZE_T cchOut, // pass in 0 to only return output buffer + // size in pcchRequired, JET_errBufferTooSmall will be returned. + SIZE_T * const pcchRequired, + const OSSTR_LOSSY fLossy, // CAUTION: setting this will allow return JET_errSuccess + // if chars were translated to '?' + const OSSTR_CONVERSION osstrConversion ) { Assert( ( pszOut != NULL && cchOut != 0 ) || @@ -523,14 +682,15 @@ ERR ErrOSSTRUnicodeToAscii( _In_ PCWSTR const pwszIn, // try the conversion BOOL fUsedDefaultChar = fTrue; // presume badly behaved API ... - const size_t cchActual = WideCharToMultiByte( UlCodePageFromOsstrConversion( osstrConversion ), - 0, - pwszIn, - -1, - pszOut, - cchOut, - NULL, - &fUsedDefaultChar ); + const SIZE_T cchActual = WideCharToMultiByte( + UlCodePageFromOsstrConversion( osstrConversion ), + 0, + pwszIn, + -1, + pszOut, + (INT)cchOut, + NULL, + &fUsedDefaultChar ); if ( NULL != pcchRequired ) *pcchRequired = cchActual; @@ -584,9 +744,15 @@ ERR ErrOSSTRUnicodeToAscii( _In_ PCWSTR const pwszIn, // caller passes pcchRequired and didn't consume pcchRequired. // #pragma warning(suppress: 38021) - *pcchRequired = WideCharToMultiByte( CP_ACP, 0, - pwszIn, -1, NULL, 0, - NULL, &fUsedDefaultChar ); + *pcchRequired = WideCharToMultiByte( + CP_ACP, + 0, + pwszIn, + -1, + NULL, + 0, + NULL, + &fUsedDefaultChar ); } return ErrERRCheck( JET_errBufferTooSmall ); } @@ -625,53 +791,24 @@ ERR ErrOSSTRUnicodeToAscii( _In_ PCWSTR const pwszIn, } -// convert a WCHAR string to a _TCHAR string - -ERR ErrOSSTRUnicodeToTchar( const wchar_t *const pwszIn, - __out_ecount(ctchOut) _TCHAR *const ptszOut, - const INT ctchOut ) -{ -#ifdef UNICODE - - // check the input buffer against the output buffer - - const wchar_t cwchIn = wcslen( pwszIn ) + 1; - if ( ctchOut < cwchIn ) - { - return ErrERRCheck( JET_errBufferTooSmall ); - } - Assert( ctchOut > 0 ); - - // copy the string - - wcsncpy( ptszOut, pwszIn, ctchOut ); - return JET_errSuccess; - -#else // !UNICODE - - return ErrOSSTRUnicodeToAscii( pwszIn, ptszOut, ctchOut ); - -#endif // UNICODE -} - - // this is to convert a multi string (double zero terminated) // into an existing buffer // if there is no buffer, we will return the needed size // if there is a buffer but not enough space, we will return error and NOT the actual size // -ERR ErrOSSTRAsciiToUnicodeM( _In_ PCSTR const szzMultiIn, - // UNDONE: Exchange prefix continued to complain, to make this right I might need like __success on the return value? - // __out_ecount_part_z(cchMax, *pcchActual) PSTR const pszOut, +// UNDONE: Exchange prefix continued to complain, to make this right I might need like __success on the return value? +// __out_ecount_part_z(cchMax, *pcchActual) PSTR const pszOut, +ERR ErrOSSTRAsciiToUnicodeM( + _In_ PCSTR const szzMultiIn, __out_ecount_z(cchMax) WCHAR * wszNew, - ULONG cchMax, - size_t * const pcchActual, - const OSSTR_CONVERSION osstrConversion ) + ULONG cchMax, + SIZE_T * const pcchActual, + const OSSTR_CONVERSION osstrConversion ) { ERR err = JET_errSuccess; - const char * szCurrent = szzMultiIn; - size_t cchActualCurrent = 0; - size_t cchMaxCurrent = cchMax; + const CHAR * szCurrent = szzMultiIn; + SIZE_T cchActualCurrent = 0; + SIZE_T cchMaxCurrent = cchMax; WCHAR * wszNewCurrent = wszNew; @@ -686,9 +823,14 @@ ERR ErrOSSTRAsciiToUnicodeM( _In_ PCSTR const szzMultiIn, while( szCurrent[0] != '\0' ) { - size_t cchCurrent; + SIZE_T cchCurrent; - err = ErrOSSTRAsciiToUnicode( szCurrent, wszNewCurrent, cchMaxCurrent, &cchCurrent, osstrConversion ); + err = ErrOSSTRAsciiToUnicode( + szCurrent, + wszNewCurrent, + cchMaxCurrent, + &cchCurrent, + osstrConversion ); if ( JET_errBufferTooSmall == err ) { @@ -742,25 +884,25 @@ ERR ErrOSSTRAsciiToUnicodeM( _In_ PCSTR const szzMultiIn, return err; } - // this is to convert a multi string (double zero terminated) // into an existing buffer // if there is no buffer, we will return the needed size // if there is a buffer but not enough space, we will return error and NOT the actual size // -ERR ErrOSSTRUnicodeToAsciiM( _In_ PCWSTR const wszzMultiIn, - // UNDONE: Exchange prefix continued to complain, to make this right I might need like __success on the return value? - // __out_ecount_part_z(cchMax, *pcchActual) PSTR const pszOut, - __out_ecount_z(cchMax) char * szNew, - ULONG cchMax, - size_t * const pcchActual, - const OSSTR_CONVERSION osstrConversion ) +// UNDONE: Exchange prefix continued to complain, to make this right I might need like __success on the return value? +// __out_ecount_part_z(cchMax, *pcchActual) PSTR const pszOut, +ERR ErrOSSTRUnicodeToAsciiM( + _In_ PCWSTR const wszzMultiIn, + __out_ecount_z(cchMax) CHAR * szNew, + ULONG cchMax, + SIZE_T * const pcchActual, + const OSSTR_CONVERSION osstrConversion ) { ERR err = JET_errSuccess; const WCHAR * wszCurrent = wszzMultiIn; - size_t cchActualCurrent = 0; - size_t cchMaxCurrent = cchMax; - char * szNewCurrent = szNew; + SIZE_T cchActualCurrent = 0; + SIZE_T cchMaxCurrent = cchMax; + CHAR * szNewCurrent = szNew; if ( !wszzMultiIn ) @@ -774,9 +916,15 @@ ERR ErrOSSTRUnicodeToAsciiM( _In_ PCWSTR const wszzMultiIn, while( wszCurrent[0] != L'\0' ) { - size_t cchCurrent; + SIZE_T cchCurrent; - err = ErrOSSTRUnicodeToAscii( wszCurrent, szNewCurrent, cchMaxCurrent * sizeof(char), &cchCurrent, OSSTR_NOT_LOSSY, osstrConversion ); + err = ErrOSSTRUnicodeToAscii( + wszCurrent, + szNewCurrent, + cchMaxCurrent * sizeof(CHAR), + &cchCurrent, + OSSTR_NOT_LOSSY, + osstrConversion ); if ( JET_errBufferTooSmall == err ) { @@ -829,4 +977,3 @@ ERR ErrOSSTRUnicodeToAsciiM( _In_ PCWSTR const wszzMultiIn, return err; } - diff --git a/dev/ese/src/os/thread.cxx b/dev/ese/src/os/thread.cxx index 5e61a0bb..add0404b 100644 --- a/dev/ese/src/os/thread.cxx +++ b/dev/ese/src/os/thread.cxx @@ -5,7 +5,6 @@ #include - // Thread Local Storage // Internal TLS structure @@ -378,7 +377,7 @@ struct _THREAD { PUTIL_THREAD_PROC pfnStart; DWORD_PTR dwParam; - const _TCHAR* szStart; + const CHAR* szStart; HANDLE hThread; DWORD idThread; BOOL fFinish; @@ -466,7 +465,7 @@ const ERR ErrUtilThreadICreate( const EThreadPriority priority, THREAD* const pThread, const DWORD_PTR dwParam, - const _TCHAR* const szStart ) + const CHAR* const szStart ) { ERR err = JET_errSuccess; diff --git a/dev/ese/src/os/trace.cxx b/dev/ese/src/os/trace.cxx index c8f3d257..b80ac891 100644 --- a/dev/ese/src/os/trace.cxx +++ b/dev/ese/src/os/trace.cxx @@ -710,7 +710,7 @@ const char* OSFormat_( __format_string const char* const szFormat, _In_ va_list szRaw[ cchRawMax ] = 0; cchRaw = 0; - if ( S_OK != StringCbVPrintfA( szRaw + cchRaw, + if ( JET_errSuccess > ErrOSStrCbVFormatA( szRaw + cchRaw, ( cchRawMax - cchRaw ) * sizeof( char ), szFormat, arglist ) ) @@ -795,7 +795,7 @@ const WCHAR* OSFormatW_( __format_string const WCHAR* const wszFormat, _In_ va_l wszRaw[ cchRawMax ] = 0; cchRaw = 0; - if ( S_OK != StringCbVPrintfW( wszRaw + cchRaw, + if ( JET_errSuccess > ErrOSStrCbVFormatW( wszRaw + cchRaw, ( cchRawMax - cchRaw ) * sizeof( WCHAR ), wszFormat, arglist ) ) diff --git a/dev/ese/src/sync/sync.cxx b/dev/ese/src/sync/sync.cxx index f3602395..0cb8400b 100644 --- a/dev/ese/src/sync/sync.cxx +++ b/dev/ese/src/sync/sync.cxx @@ -1,25 +1,8 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -// tchar.h must be above where strsafe.h is includes. -#pragma prefast(push) -#pragma prefast(disable:26006, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:26007, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:28718, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:28726, "Dont bother us with tchar, someone else owns that.") -#include -#pragma prefast(pop) - #include "sync.hxx" -#define STRSAFE_NO_DEPRECATE 1 -#pragma prefast(push) -#pragma prefast(disable:28196, "Do not bother us with strsafe, someone else owns that.") -#pragma prefast(disable:28205, "Do not bother us with strsafe, someone else owns that.") -#include -#pragma prefast(pop) - - // Random Fault Injection #ifdef DEBUG @@ -4262,7 +4245,6 @@ void CKernelSemaphore::Release( const INT cToRelease ) #include #include -#include // ================================================================ class CPrintF @@ -4313,7 +4295,7 @@ inline void __cdecl CIPrintF::operator()( const CHAR* szFormat, ... ) CHAR szT[ 1024 ]; va_list arg_ptr; va_start( arg_ptr, szFormat ); - StringCbVPrintfA( szT, sizeof(szT), szFormat, arg_ptr ); + vsprintf_s( szT, sizeof(szT), szFormat, arg_ptr ); va_end( arg_ptr ); CHAR* szLast = szT; @@ -4444,7 +4426,7 @@ void __cdecl CFPrintF::operator()( const char* szFormat, ... ) va_list arg_ptr; va_start( arg_ptr, szFormat ); - StringCbVPrintfA( szBuf, cchBuf, szFormat, arg_ptr ); + vsprintf_s( szBuf, cchBuf, szFormat, arg_ptr ); va_end( arg_ptr ); // append the string to the file @@ -4890,7 +4872,7 @@ LOCAL VOID SprintHex( { if ( 0 != cbAddress ) { - StringCbPrintfA( sz, cbDest-(sz-szDest), "%*.*lx ", cbAddress, cbAddress, (DWORD)(pb - rgbSrc) + cbStart ); + sprintf_s( sz, cbDest-(sz-szDest), "%*.*lx ", cbAddress, cbAddress, (DWORD)(pb - rgbSrc) + cbStart ); sz += strlen( sz ); } CHAR * szHex = sz; From da7d97ca71fac26b1ccf5e18a68feeb2bed7c7e5 Mon Sep 17 00:00:00 2001 From: TAW Date: Thu, 6 Oct 2022 12:29:20 +0000 Subject: [PATCH 048/102] Fixed missed removal of tchar.h [Substrate:ab1ee63f176508477bdd2c5ce5a0c5d95597b0a9] --- dev/ese/src/_xpress10/xpress10corsica.cxx | 7 ------- dev/ese/src/_xpress10/xpress10sw.cxx | 7 ------- 2 files changed, 14 deletions(-) diff --git a/dev/ese/src/_xpress10/xpress10corsica.cxx b/dev/ese/src/_xpress10/xpress10corsica.cxx index 15115b6b..24a2f816 100644 --- a/dev/ese/src/_xpress10/xpress10corsica.cxx +++ b/dev/ese/src/_xpress10/xpress10corsica.cxx @@ -9,13 +9,6 @@ #include #include #include -#pragma prefast(push) -#pragma prefast(disable:26006, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:26007, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:28718, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:28726, "Dont bother us with tchar, someone else owns that.") -//TAW_TODO #include -#pragma prefast(pop) #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN diff --git a/dev/ese/src/_xpress10/xpress10sw.cxx b/dev/ese/src/_xpress10/xpress10sw.cxx index ee418f98..b1671d76 100644 --- a/dev/ese/src/_xpress10/xpress10sw.cxx +++ b/dev/ese/src/_xpress10/xpress10sw.cxx @@ -9,13 +9,6 @@ #include #include #include -#pragma prefast(push) -#pragma prefast(disable:26006, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:26007, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:28718, "Dont bother us with tchar, someone else owns that.") -#pragma prefast(disable:28726, "Dont bother us with tchar, someone else owns that.") -//#include -#pragma prefast(pop) #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN From f3ab9591757c4aab6d9d1d41e19c4f913bb2ce57 Mon Sep 17 00:00:00 2001 From: Alexandre Costa Date: Thu, 6 Oct 2022 21:09:11 +0000 Subject: [PATCH 049/102] Block Trim from running during Shrink (+ fix rank violation) [Substrate:82051d30f9f9e10d9e8714c79e395501d973055d] --- dev/ese/src/ese/bf.cxx | 28 ++++++++++++++++++++++------ dev/ese/src/ese/space.cxx | 8 +++++--- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/dev/ese/src/ese/bf.cxx b/dev/ese/src/ese/bf.cxx index 0e9a3fa8..961c9ac0 100644 --- a/dev/ese/src/ese/bf.cxx +++ b/dev/ese/src/ese/bf.cxx @@ -23264,23 +23264,34 @@ void BFIPurgeAllPageVersions( _Inout_ BFLatch* const pbfl, const TraceContext& t g_critBFDepend.Enter(); for ( PBF pbfAbandon = pbf->pbfTimeDepChainNext; pbfAbandon != pbfNil; ) { + BOOL fRetry = fFalse; + if ( pbfAbandon->sxwl.ErrTryAcquireExclusiveLatch() == CSXWLatch::ERR::errSuccess ) { if ( !pbfAbandon->fAbandoned ) { - pbfAbandon->sxwl.UpgradeExclusiveLatchToWriteLatch(); - pbfAbandon->fAbandoned = fTrue; - pbfAbandon->sxwl.ReleaseWriteLatch(); + if ( pbfAbandon->sxwl.ErrTryUpgradeExclusiveLatchToWriteLatch() == CSXWLatch::ERR::errSuccess ) + { + pbfAbandon->fAbandoned = fTrue; + pbfAbandon->sxwl.ReleaseWriteLatch(); + } + else + { + fRetry = fTrue; + pbfAbandon->sxwl.ReleaseExclusiveLatch(); + } } else { pbfAbandon->sxwl.ReleaseExclusiveLatch(); } - - // Next in the chain. - pbfAbandon = pbfAbandon->pbfTimeDepChainNext; } else + { + fRetry = fTrue; + } + + if ( fRetry ) { // Avoid deadlocks. g_critBFDepend.Leave(); @@ -23290,6 +23301,11 @@ void BFIPurgeAllPageVersions( _Inout_ BFLatch* const pbfl, const TraceContext& t // Reset enumeration because we left the g_critBFDepend for an instant. pbfAbandon = pbf->pbfTimeDepChainNext; } + else + { + // Next in the chain. + pbfAbandon = pbfAbandon->pbfTimeDepChainNext; + } } g_critBFDepend.Leave(); diff --git a/dev/ese/src/ese/space.cxx b/dev/ese/src/ese/space.cxx index f75206f4..d218b3a4 100644 --- a/dev/ese/src/ese/space.cxx +++ b/dev/ese/src/ese/space.cxx @@ -14367,6 +14367,8 @@ LOCAL ERR ErrSPITrimRegion( { ERR err; + Expected( !g_rgfmp[ ifmp ].FShrinkIsActive() ); + *pcpgSparseBeforeThisExtent = 0; *pcpgSparseAfterThisExtent = 0; @@ -14523,15 +14525,15 @@ LOCAL ERR ErrSPIAddFreedExtent( Assert( Pcsr( pfucbAE )->FLatched() ); if ( ( pgnoParentFDP == pgnoSystemRoot ) && + !g_rgfmp[ ifmp ].FShrinkIsActive() && g_rgfmp[ ifmp ].FTrimSupported() && - ( ( GrbitParam( g_rgfmp[ ifmp ].Pinst(), JET_paramEnableShrinkDatabase ) & ( JET_bitShrinkDatabaseOn | JET_bitShrinkDatabaseRealtime ) ) == - ( JET_bitShrinkDatabaseOn | JET_bitShrinkDatabaseRealtime ) ) ) + ( ( GrbitParam( g_rgfmp[ ifmp ].Pinst(), JET_paramEnableShrinkDatabase ) & ( JET_bitShrinkDatabaseOn | JET_bitShrinkDatabaseRealtime ) ) == ( JET_bitShrinkDatabaseOn | JET_bitShrinkDatabaseRealtime ) ) ) { // In-line version of database trim. Swallow the errors because at this point in the // code an error freeing up on-disk space should not block freeing up the extent. CPG cpgSparseBeforeThisExtent = 0; CPG cpgSparseAfterThisExtent = 0; - (void) ErrSPITrimRegion( ifmp, pfucbAE->ppib, pgnoLast, cpgSize, &cpgSparseBeforeThisExtent, &cpgSparseAfterThisExtent ); + (void)ErrSPITrimRegion( ifmp, pfucbAE->ppib, pgnoLast, cpgSize, &cpgSparseBeforeThisExtent, &cpgSparseAfterThisExtent ); } HandleError: From be2dcb7fd3121e049cb24c6468dbf278537de282 Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Fri, 7 Oct 2022 15:29:34 +0000 Subject: [PATCH 050/102] ESE Block Cache: fix cache failure under high load (such as eseutil /r) For a while, I have been seeing stress failures where the cache fails to clean a slab during a normal update because all of the blocks in that slab were subject to active IO Range Locks. I knew that this was likely due to a large IO Range Lock of 3MB vs the 16MB test cache size but I didn't know exactly what was happing. It was rare so it was a low priority to fix. Recently, I was using eseutil /r in prod to reproduce a transient lost flush bug. I found that eseutil /r would very easily hit this same condition. The normal passive/active copy activity has never hit this condition even once. Eseutil /r is old code that was written w/o regard for sharing the hardware so obviously increased aggressiveness was a factor. To attack this problem, I added a bunch of instrumentation for IO Range Locks in the context of overall usage, usage per thread, and usage per slab. The usage per slab is the key metric that can cause problems for the clean process. In this process I discovered that we were not honoring the IO aggregation limits. This was causing potentially huge IOs to be accumulated and these caused single huge IO Range Locks to be acquired. This is the cause of the stress failure. Of course, it isn't enough to just prevent these from being aggregated. We must also determine when "too much" IO is outstanding and pause until we can do more to avoid excessive locking. We use the per slab IO Range Lock stats to determine when this happens and we will avoid issuing more IO until we are back under the limits. If the IO is sync or a thread hasn't yet issued an async IO then we will always allow one IO to proceed to guarantee forward progress. Note that this is primarily a hazard with the hash function. In prod, the 15GB cache I was testing has 35k slabs each holding 13 database pages. We were unlucky enough to have enough outstanding IO Range Locks to cover all 13 pages on one of them. This is why this change uses the per slab stats to achieve this goal rather than some static limit per thread or cache. Other fixes: - Clean up of TFileFilter's IO combination logic where we were taking the grbitQOS from the wrong source - Reduced some Persistence Filter tracing [Substrate:3cdf9fd9140ded493d2d9cdf2eab82bd4a17e5ce] --- dev/ese/src/os/blockcache/_cachebase.hxx | 1 + dev/ese/src/os/blockcache/_filefilter.hxx | 31 +- .../src/os/blockcache/_hashedlrukcache.hxx | 310 ++++++++++++++++-- .../_hashedlrukcachethreadlocalstorage.hxx | 11 +- 4 files changed, 312 insertions(+), 41 deletions(-) diff --git a/dev/ese/src/os/blockcache/_cachebase.hxx b/dev/ese/src/os/blockcache/_cachebase.hxx index 545edec0..3842f6bc 100644 --- a/dev/ese/src/os/blockcache/_cachebase.hxx +++ b/dev/ese/src/os/blockcache/_cachebase.hxx @@ -197,6 +197,7 @@ class TCacheBase // c CFTE* Pcfte() const { return m_pcfte; } const COffsets& Offsets() const { return m_offsets; } const BYTE* const PbData() const { return m_pbData; } + OSFILEQOS GrbitQOS() const { return m_grbitQOS; } ICache::CachingPolicy Cp() const { return m_cp; } BOOL FSync() const { return m_pfnComplete == NULL; } diff --git a/dev/ese/src/os/blockcache/_filefilter.hxx b/dev/ese/src/os/blockcache/_filefilter.hxx index 5e781dc5..3bd15eed 100644 --- a/dev/ese/src/os/blockcache/_filefilter.hxx +++ b/dev/ese/src/os/blockcache/_filefilter.hxx @@ -784,7 +784,7 @@ class TFileFilter // ff Error( JET_errSuccess ); } - Alloc( prequest = new CRequest( volumeid, fileid, fileserial, fRead, offsets ) ); + Alloc( prequest = new CRequest( volumeid, fileid, fileserial, fRead, offsets, grbitQOS ) ); Call( ErrRequest( pfsconfig, fRead, grbitQOS, fMustCombineIO, &prequest, &fCombined ) ); @@ -844,12 +844,14 @@ class TFileFilter // ff _In_ const FileId fileid, _In_ const FileSerial fileserial, _In_ const BOOL fRead, - _In_ const COffsets& offsets ) + _In_ const COffsets& offsets, + _In_ const OSFILEQOS grbitQOS ) : m_volumeid( volumeid ), m_fileid( fileid ), m_fileserial( fileserial ), m_fRead( fRead ), - m_offsets( offsets ) + m_offsets( offsets ), + m_grbitQOS( grbitQOS ) { m_ilRequestsByIO.InsertAsPrevMost( this ); } @@ -872,6 +874,7 @@ class TFileFilter // ff FileSerial Fileserial() const { return m_fileserial; } BOOL FRead() const { return m_fRead; } const COffsets& Offsets() const { return m_offsets; } + OSFILEQOS GrbitQOS() const { return m_grbitQOS; } COffsets OffsetsForIO() const { @@ -900,6 +903,7 @@ class TFileFilter // ff const FileSerial m_fileserial; const BOOL m_fRead; const COffsets m_offsets; + const OSFILEQOS m_grbitQOS; typename CCountedInvasiveList::CElement m_ileIOs; CCountedInvasiveList m_ilRequestsByIO; typename CCountedInvasiveList::CElement m_ileRequestsByIO; @@ -934,7 +938,7 @@ class TFileFilter // ff prequestIOPrev && !FConflicting( prequestIOPrev, prequestIO ); prequestIOPrev = IlIORequested().Prev( prequestIOPrev ) ) { - if ( FCombinable( pfsconfig, grbitQOS, prequestIOPrev, prequestIO ) ) + if ( FCombinable( pfsconfig, prequestIOPrev, prequestIO ) ) { if ( prequestIOPrev->OffsetsForIO().IbStart() > prequestIO->OffsetsForIO().IbStart() ) { @@ -969,13 +973,13 @@ class TFileFilter // ff // determine if this request could be combined via IO gap coalescing if ( IlIORequested().Prev( prequestIO ) && - FBridgeableGap( pfsconfig, grbitQOS, IlIORequested().Prev( prequestIO ), prequestIO ) ) + FBridgeableGap( pfsconfig, IlIORequested().Prev( prequestIO ), prequestIO ) ) { fCombined = fTrue; } if ( IlIORequested().Next( prequestIO ) && - FBridgeableGap( pfsconfig, grbitQOS, prequestIO, IlIORequested().Next( prequestIO ) ) ) + FBridgeableGap( pfsconfig, prequestIO, IlIORequested().Next( prequestIO ) ) ) { fCombined = fTrue; } @@ -1029,7 +1033,6 @@ class TFileFilter // ff } BOOL FCombinable( _In_ IFileSystemConfiguration* const pfsconfig, - _In_ const OSFILEQOS grbitQOS, _In_ CRequest* const prequestIOA, _In_ CRequest* const prequestIOB ) { @@ -1080,7 +1083,7 @@ class TFileFilter // ff if ( offsetsIOA.Cb() + offsetsIOB.Cb() > cbMaxSize ) { - if ( !FOverrideMaxSize( grbitQOS, prequestIOA->FRead() ) ) + if ( !FOverrideMaxSize( prequestIOA ) && !FOverrideMaxSize( prequestIOB ) ) { return fFalse; } @@ -1089,14 +1092,14 @@ class TFileFilter // ff return fTrue; } - BOOL FOverrideMaxSize( _In_ const OSFILEQOS grbitQOS, _In_ const BOOL fRead ) + BOOL FOverrideMaxSize( _In_ CRequest* const prequestIO ) { - if ( fRead ) + if ( prequestIO->FRead() ) { return fFalse; } - if ( !( grbitQOS & qosIOOptimizeOverrideMaxIOLimits ) ) + if ( !( prequestIO->GrbitQOS() & qosIOOptimizeOverrideMaxIOLimits ) ) { return fFalse; } @@ -1150,7 +1153,6 @@ class TFileFilter // ff } BOOL FBridgeableGap( _In_ IFileSystemConfiguration* const pfsconfig, - _In_ const OSFILEQOS grbitQOS, _In_ CRequest* const prequestIOA, _In_ CRequest* const prequestIOB ) { @@ -1210,10 +1212,7 @@ class TFileFilter // ff if ( offsetsIOA.Cb() + offsetsIOB.Cb() + cbGap > pfsconfig->CbMaxReadSize() ) { - if ( !FOverrideMaxSize( grbitQOS, prequestIOA->FRead() ) ) - { - return fFalse; - } + return fFalse; } return fTrue; diff --git a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx index 789888fe..3a28b812 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx @@ -148,6 +148,7 @@ class THashedLRUKCache CHashedLRUKCachedFileTableEntry* Pcfte() const { return THashedLRUKCacheBase::CRequest::Pcfte(); } const COffsets& Offsets() const { return THashedLRUKCacheBase::CRequest::Offsets(); } const BYTE* const PbData() const { return THashedLRUKCacheBase::CRequest::PbData(); } + OSFILEQOS GrbitQOS() const { return THashedLRUKCacheBase::CRequest::GrbitQOS(); } ICache::CachingPolicy Cp() const { return THashedLRUKCacheBase::CRequest::Cp(); } ERR ErrStatus() const { return THashedLRUKCacheBase::CRequest::ErrStatus(); } @@ -3779,11 +3780,14 @@ class THashedLRUKCache CallS( ErrToErr( m_pbmLoaded->ErrGet( Islab( ibSlab ), &fSlabLoaded ) ) ); - OSTrace( JET_tracetagBlockCacheOperations, - OSFormat( "C=%s Presence Filter 0x%016I64x IsSlabLoaded %s", - OSFormatFileId( m_pc ), - ibSlab, - fSlabLoaded ? "fTrue" : "fFalse" ) ); + if ( !fSlabLoaded ) + { + OSTrace( JET_tracetagBlockCacheOperations, + OSFormat( "C=%s Presence Filter 0x%016I64x IsSlabLoaded %s", + OSFormatFileId( m_pc ), + ibSlab, + OSFormatBoolean( fSlabLoaded ) ) ); + } return fSlabLoaded; } @@ -4012,15 +4016,18 @@ class THashedLRUKCache BOOL fPossiblyContains = FPossiblyContains( dwHash ); - OSTrace( JET_tracetagBlockCacheOperations, - OSFormat( "C=%s Presence Filter %s,0x%08x Contains 0x%02x %s", - OSFormatFileId( m_pc ), - OSFormat( cbid.Volumeid(), - cbid.Fileid(), - cbid.Fileserial() ), - cbid.Cbno(), - WFingerprint( dwHash ), - fPossiblyContains ? "fTrue" : "fFalse" ) ); + if ( !fPossiblyContains ) + { + OSTrace( JET_tracetagBlockCacheOperations, + OSFormat( "C=%s Presence Filter %s,0x%08x Contains 0x%02x %s", + OSFormatFileId( m_pc ), + OSFormat( cbid.Volumeid(), + cbid.Fileid(), + cbid.Fileserial() ), + cbid.Cbno(), + WFingerprint( dwHash ), + OSFormatBoolean( fPossiblyContains ) ) ); + } m_rwlPresenceFilter.LeaveAsReader(); @@ -4744,6 +4751,7 @@ class THashedLRUKCache ERR ErrEnqueue( _Inout_ CRequest** const pprequest ); BOOL FConflicting( _In_ CRequest* const prequestIOA, _In_ CRequest* const prequestIOB ); BOOL FCombinable( _In_ CRequest* const prequestIOA, _In_ CRequest* const prequestIOB ); + BOOL FOverrideMaxSize( _In_ CRequest* const prequestIO ); int CmpRequestIO( _In_ CRequest* const prequestIOA, _In_ CRequest* const prequestIOB ); void Issue(); @@ -4752,6 +4760,18 @@ class THashedLRUKCache ERR ErrSynchronousIO( _In_ CRequest* const prequest ); + void WaitForIORangeLock( _In_ CRequest* const prequest ); + BOOL FWaitForIORangeLock( _In_ CRequest* const prequest, + _In_opt_ CHashedLRUKCacheThreadLocalStorage* const pctls = NULL ); + void ReleaseIORangeLock( _In_ CRequest* const prequest, + _In_opt_ CHashedLRUKCacheThreadLocalStorage* const pctls = NULL ); + void ReleaseIORangeLockedCounts( _In_ CRequest* const prequest, + _In_opt_ CHashedLRUKCacheThreadLocalStorage* const pctls = NULL ); + BOOL FAcquireIORangeLockedBySlabCounts( _In_ CRequest* const prequest, _In_ const BOOL fFirstRequest ); + void ReleaseIORangeLockedBySlabCounts( _In_ CHashedLRUKCachedFileTableEntry* const pcfte, + _In_ const COffsets& offsets ); + DWORD CCachedBlockIORangeLockedBySlabMax() const; + void RequestCachedFileIO( _In_ CRequest* const prequestIO ); void RequestCachingFileIO( _In_ CRequest* const prequestIO ); void RequestIO( _In_ CRequest* const prequestIO ); @@ -4928,6 +4948,16 @@ class THashedLRUKCache return ( m_pch->CbCachingFilePerSlab() / ( CCachedBlockChunk::Ccbl() * cbCachedBlock ) ) * sizeof( CCachedBlockChunk ); } + QWORD IHashedSlab( _In_ const QWORD ibSlab ) const + { + return ( ibSlab - m_pch->IbChunkHash() ) / CbChunkPerSlab(); + } + + QWORD CHashedSlab() const + { + return m_pch->CbChunkHash() / CbChunkPerSlab(); + } + private: static const CCachedBlockId s_cbidInvalid; @@ -4940,6 +4970,11 @@ class THashedLRUKCache ICachedBlockSlabManager* m_pcbsmHash; ICachedBlockSlabManager* m_pcbsmJournal; + volatile DWORD m_cIORangeLockedContext; + volatile DWORD m_cIORangeLocked; + volatile QWORD m_cbIORangeLocked; + volatile DWORD* m_rgcCachedBlockIORangeLockedBySlab; + CSemaphore m_semQuiesceStateAccess; CMeteredSection m_msStateAccess; CManualResetSignal m_msigStateAccess0; @@ -5007,6 +5042,7 @@ THashedLRUKCache::~THashedLRUKCache() ReleaseCompletedSlabWriteBacks(); TermSlabWriteBackHash(); delete[] m_rgcrefJournalSlab; + delete[] m_rgcCachedBlockIORangeLockedBySlab; delete m_pcbsmJournal; delete m_pcbsmHash; delete m_pcbwcm; @@ -6003,6 +6039,10 @@ THashedLRUKCache::THashedLRUKCache( _In_ IFileSystemFilter* const m_pcbwcm( NULL ), m_pcbsmHash( NULL ), m_pcbsmJournal( NULL ), + m_cIORangeLockedContext( 0 ), + m_cIORangeLocked( 0 ), + m_cbIORangeLocked( 0 ), + m_rgcCachedBlockIORangeLockedBySlab( NULL ), m_semQuiesceStateAccess( CSyncBasicInfo( "THashedLRUKCache::m_semQuiesceStateAccess" ) ), m_msigStateAccess0( CSyncBasicInfo( "THashedLRUKCache::m_msigStateAccess0" ) ), m_msigStateAccess1( CSyncBasicInfo( "THashedLRUKCache::m_msigStateAccess1" ) ), @@ -6374,6 +6414,7 @@ template< class I > ERR THashedLRUKCache::ErrInit() { ERR err = JET_errSuccess; + SIZE_T cSlab = 0; ClusterNumber clnoMin = clnoInvalid; ClusterNumber clnoMax = clnoInvalid; @@ -6385,6 +6426,12 @@ ERR THashedLRUKCache::ErrInit() Call( CCachedBlockPresenceFilter::ErrInit( this, &m_pcbpf ) ); + // init the slab counts + + cSlab = CHashedSlab(); + Alloc( (void*)( m_rgcCachedBlockIORangeLockedBySlab = new volatile DWORD[ cSlab ] ) ); + memset( (void*)m_rgcCachedBlockIORangeLockedBySlab, 0, sizeof( m_rgcCachedBlockIORangeLockedBySlab[ 0 ] ) * cSlab ); + // mount the journal Call( ErrMountJournal( m_pch, &m_pj ) ); @@ -8081,9 +8128,37 @@ BOOL THashedLRUKCache::FCombinable( _In_ CRequest* const prequestIOA, _In_ CR return fFalse; } + // IOs that are too large cannot be combined + + const QWORD cbMaxSize = prequestIOA->FRead() ? Pfsconfig()->CbMaxReadSize() : Pfsconfig()->CbMaxWriteSize(); + + if ( offsetsIOA.Cb() + offsetsIOB.Cb() > cbMaxSize ) + { + if ( !FOverrideMaxSize( prequestIOA ) && !FOverrideMaxSize( prequestIOB ) ) + { + return fFalse; + } + } + return fTrue; } +template +BOOL THashedLRUKCache::FOverrideMaxSize( _In_ CRequest* const prequestIO ) +{ + if ( prequestIO->FRead() ) + { + return fFalse; + } + + if ( !( prequestIO->GrbitQOS() & qosIOOptimizeOverrideMaxIOLimits ) ) + { + return fFalse; + } + + return fTrue; +} + template int THashedLRUKCache::CmpRequestIO( _In_ CRequest* const prequestIOA, _In_ CRequest* const prequestIOB ) { @@ -8166,6 +8241,9 @@ void THashedLRUKCache::Issue() template void THashedLRUKCache::AsyncIOWorker( _In_ CHashedLRUKCacheThreadLocalStorage* const pctls ) { + BOOL fIORangeLockFailure = fFalse; + CRequest* prequestIONext = NULL; + pctls->BeginAsyncIOWorker(); // for each issued IO, request an IO range lock in terms of the cached file. these IO range locks not only @@ -8173,18 +8251,28 @@ void THashedLRUKCache::AsyncIOWorker( _In_ CHashedLRUKCacheThreadLocalStorage // offset range including things like write back or moving cached blocks in the caching file pctls->CritAsyncIOWorkerState().Enter(); - while ( CRequest* prequestIO = pctls->IlIOIssued().PrevMost() ) + prequestIONext = NULL; + for ( CRequest* prequestIO = pctls->IlIOIssued().PrevMost(); + prequestIO; + prequestIO = prequestIONext ) { - prequestIO->WaitForIORangeLock( CHashedLRUKCacheThreadLocalStorage::CueAsyncIOWorker, (DWORD_PTR)pctls ); + prequestIONext = pctls->IlIOIssued().Next( prequestIO ); - pctls->IlIOIssued().Remove( prequestIO ); - pctls->IlIORangeLockPending().InsertAsNextMost( prequestIO ); + if ( FWaitForIORangeLock( prequestIO, pctls ) ) + { + pctls->IlIOIssued().Remove( prequestIO ); + pctls->IlIORangeLockPending().InsertAsNextMost( prequestIO ); + } + else + { + fIORangeLockFailure = fTrue; + } } pctls->CritAsyncIOWorkerState().Leave(); // determine which requested IO range locks have been acquired - CRequest* prequestIONext = NULL; + prequestIONext = NULL; for ( CRequest* prequestIO = pctls->IlIORangeLockPending().PrevMost(); prequestIO; prequestIO = prequestIONext ) @@ -8287,7 +8375,7 @@ void THashedLRUKCache::AsyncIOWorker( _In_ CHashedLRUKCacheThreadLocalStorage { pctls->IlFinalizeIOCompleted().Remove( prequestIO ); - prequestIO->Piorl()->Release(); + ReleaseIORangeLock( prequestIO, pctls ); CRequest* prequestNext = NULL; for ( CRequest* prequest = prequestIO->IlRequestsByIO().PrevMost(); @@ -8307,6 +8395,13 @@ void THashedLRUKCache::AsyncIOWorker( _In_ CHashedLRUKCacheThreadLocalStorage pctls->RemoveRequest( prequestIO ); } + // if we failed to get an IO Range Lock and we currently have no IO Range Locks then we should try to issue again + + if ( fIORangeLockFailure && pctls->CIORangeLocked() == 0 ) + { + pctls->CueAsyncIOWorker(); + } + pctls->EndAsyncIOWorker(); } @@ -8320,7 +8415,7 @@ ERR THashedLRUKCache::ErrSynchronousIO( _In_ CRequest* const prequest ) // also serialize all activity for that offset range including things like write back or moving cached blocks in // the caching file - prequest->WaitForIORangeLock(); + WaitForIORangeLock( prequest ); // request our IO @@ -8340,10 +8435,179 @@ ERR THashedLRUKCache::ErrSynchronousIO( _In_ CRequest* const prequest ) // release the IO range lock - prequest->Piorl()->Release(); + ReleaseIORangeLock( prequest ); return err; } +template +void THashedLRUKCache::WaitForIORangeLock( _In_ CRequest* const prequest ) +{ + const BOOL fSuccess = FWaitForIORangeLock( prequest ); + EnforceSz( fSuccess, "FWaitForIORangeLock" ); +} + +template +BOOL THashedLRUKCache::FWaitForIORangeLock( _In_ CRequest* const prequest, + _In_opt_ CHashedLRUKCacheThreadLocalStorage* const pctls ) +{ + BOOL fFirstRequest = fFalse; + BOOL fRelease = fTrue; + BOOL fAcquired = fFalse; + + if ( !pctls ) + { + fFirstRequest = fTrue; + } + else + { + fFirstRequest = AtomicIncrement( (DWORD*)&pctls->CIORangeLocked() ) == 1; + AtomicAdd( (QWORD*)&pctls->CbIORangeLocked(), prequest->OffsetsForIO().Cb() ); + } + + AtomicExchangeAdd( (LONG*)&m_cIORangeLockedContext, fFirstRequest ? 1 : 0 ); + AtomicIncrement( (DWORD*)&m_cIORangeLocked ); + AtomicAdd( (QWORD*)&m_cbIORangeLocked, prequest->OffsetsForIO().Cb() ); + + fRelease = fTrue; + + if ( FAcquireIORangeLockedBySlabCounts( prequest, fFirstRequest ) ) + { + fAcquired = fTrue; + fRelease = fFalse; + + prequest->WaitForIORangeLock( pctls ? CHashedLRUKCacheThreadLocalStorage::CueAsyncIOWorker_ : NULL, + (DWORD_PTR)pctls ); + } + + if ( fRelease ) + { + ReleaseIORangeLockedCounts( prequest, pctls ); + } + + return fAcquired; +} + +template +void THashedLRUKCache::ReleaseIORangeLock( _In_ CRequest* const prequest, + _In_opt_ CHashedLRUKCacheThreadLocalStorage* const pctls ) +{ + prequest->Piorl()->Release(); + + ReleaseIORangeLockedBySlabCounts( prequest->Pcfte(), prequest->OffsetsForIO() ); + + ReleaseIORangeLockedCounts( prequest, pctls ); +} + +template +void THashedLRUKCache::ReleaseIORangeLockedCounts( _In_ CRequest* const prequest, + _In_opt_ CHashedLRUKCacheThreadLocalStorage* const pctls ) +{ + BOOL fLastRequest = fFalse; + const QWORD cbIORangeLocked = prequest->OffsetsForIO().Cb(); + const QWORD cbIORangeLockedNegative = (QWORD)( -( (LONGLONG)cbIORangeLocked ) ); + + if ( !pctls ) + { + fLastRequest = fTrue; + } + else + { + Enforce( pctls->CIORangeLocked() >= 1 ); + fLastRequest = AtomicDecrement( (DWORD*)&pctls->CIORangeLocked() ) == 0; + Enforce( pctls->CbIORangeLocked() >= cbIORangeLocked ); + AtomicAdd( (QWORD*)&pctls->CbIORangeLocked(), cbIORangeLockedNegative ); + } + + Enforce( m_cIORangeLockedContext >= (DWORD)( fLastRequest ? 1 : 0 ) ); + AtomicExchangeAdd( (LONG*)&m_cIORangeLockedContext, fLastRequest ? -1 : 0 ); + Enforce( m_cIORangeLocked >= 1 ); + AtomicDecrement( (DWORD*)&m_cIORangeLocked ); + Enforce( m_cbIORangeLocked >= cbIORangeLocked ); + AtomicAdd( (QWORD*)&m_cbIORangeLocked, cbIORangeLockedNegative ); +} + +template +BOOL THashedLRUKCache::FAcquireIORangeLockedBySlabCounts( _In_ CRequest* const prequest, _In_ const BOOL fFirstRequest ) +{ + const COffsets offsets = prequest->OffsetsForIO(); + CHashedLRUKCachedFileTableEntry* const pcfte = prequest->Pcfte(); + const QWORD cSlab = CHashedSlab(); + const DWORD cCachedBlockMax = fFirstRequest ? dwMax : CCachedBlockIORangeLockedBySlabMax(); + + for ( QWORD ibCachedBlock = offsets.IbStart(); + ibCachedBlock <= offsets.IbEnd(); + ibCachedBlock += cbCachedBlock ) + { + const CachedBlockNumber cbno = (CachedBlockNumber)( ibCachedBlock / cbCachedBlock ); + + if ( ibCachedBlock != (QWORD)cbno * cbCachedBlock || cbno == cbnoInvalid ) + { + } + else + { + const CCachedBlockId cbid( pcfte->Volumeid(), pcfte->Fileid(), pcfte->Fileserial(), cbno ); + QWORD ibSlab = 0; + + if ( m_pcbsmHash->ErrGetSlabForCachedBlock( cbid, &ibSlab ) >= JET_errSuccess ) + { + QWORD iSlab = IHashedSlab( ibSlab ); + DWORD cCachedBlockT = 0; + + if ( iSlab < cSlab ) + { + if ( !FAtomicIncrementMax( (DWORD*)&m_rgcCachedBlockIORangeLockedBySlab[ iSlab ], &cCachedBlockT, cCachedBlockMax ) ) + { + ReleaseIORangeLockedBySlabCounts( pcfte, COffsets( prequest->OffsetsForIO().IbStart(), ibCachedBlock - 1 ) ); + return fFalse; + } + } + } + } + } + + return fTrue; +} + +template +void THashedLRUKCache::ReleaseIORangeLockedBySlabCounts( _In_ CHashedLRUKCachedFileTableEntry* const pcfte, + _In_ const COffsets& offsets ) +{ + const QWORD cSlab = CHashedSlab(); + + for ( QWORD ibCachedBlock = offsets.IbStart(); + ibCachedBlock <= offsets.IbEnd(); + ibCachedBlock += cbCachedBlock ) + { + const CachedBlockNumber cbno = (CachedBlockNumber)( ibCachedBlock / cbCachedBlock ); + + if ( ibCachedBlock != (QWORD)cbno * cbCachedBlock || cbno == cbnoInvalid ) + { + } + else + { + const CCachedBlockId cbid( pcfte->Volumeid(), pcfte->Fileid(), pcfte->Fileserial(), cbno ); + QWORD ibSlab = 0; + + if ( m_pcbsmHash->ErrGetSlabForCachedBlock( cbid, &ibSlab ) >= JET_errSuccess ) + { + QWORD iSlab = IHashedSlab( ibSlab ); + + if ( iSlab < cSlab ) + { + Enforce( m_rgcCachedBlockIORangeLockedBySlab[ iSlab ] > 0 ); + AtomicDecrement( (DWORD*)&m_rgcCachedBlockIORangeLockedBySlab[ iSlab ] ); + } + } + } + } +} + +template +DWORD THashedLRUKCache::CCachedBlockIORangeLockedBySlabMax() const +{ + return (DWORD)( m_pch->CbCachingFilePerSlab() / cbCachedBlock / 2 ); +} + template void THashedLRUKCache::RequestCachedFileIO( _In_ CRequest* const prequestIO ) { @@ -8370,7 +8634,7 @@ void THashedLRUKCache::WaitForPendingIOAsync( _In_ CHashedLRUKCacheThreadL prequest; prequest = prequestIO->IlRequestsByIO().Next( prequest ) ) { - prequest->WaitForIO( CHashedLRUKCacheThreadLocalStorage::CueAsyncIOWorker, (DWORD_PTR)pctls ); + prequest->WaitForIO( CHashedLRUKCacheThreadLocalStorage::CueAsyncIOWorker_, (DWORD_PTR)pctls ); } } diff --git a/dev/ese/src/os/blockcache/_hashedlrukcachethreadlocalstorage.hxx b/dev/ese/src/os/blockcache/_hashedlrukcachethreadlocalstorage.hxx index 74ec45d9..4a2a1b14 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcachethreadlocalstorage.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcachethreadlocalstorage.hxx @@ -23,7 +23,9 @@ class CHashedLRUKCacheThreadLocalStorage // ctls m_critAsyncIOWorkerState( CLockBasicInfo( CSyncBasicInfo( "CHashedLRUKCacheThreadLocalStorage::m_critAsyncIOWorkerState" ), rankIssued, 0 ) ), m_rgibSlab { 0 }, m_rgpcbsSlab { NULL }, - m_ibSlabWait( 0 ) + m_ibSlabWait( 0 ), + m_cIORangeLocked( 0 ), + m_cbIORangeLocked( 0 ) { m_semAsyncIOWorkerRequest.Release(); m_semAsyncIOWorkerRequest.Release(); @@ -52,6 +54,8 @@ class CHashedLRUKCacheThreadLocalStorage // ctls CCountedInvasiveList& IlFinalizeIOPending() { return m_ilFinalizeIOPending; } CCountedInvasiveList& IlFinalizeIOCompleted() { return m_ilFinalizeIOCompleted; } QWORD IbSlabWait() const { return AtomicRead( (__int64*)&m_ibSlabWait ); } + volatile DWORD& CIORangeLocked() { return m_cIORangeLocked; } + volatile QWORD& CbIORangeLocked() { return m_cbIORangeLocked; } void AddRequest( _Inout_ CRequest** const pprequest ) { @@ -221,7 +225,7 @@ class CHashedLRUKCacheThreadLocalStorage // ctls } } - static void CueAsyncIOWorker( _In_ const DWORD_PTR keyIOComplete ) + static void CueAsyncIOWorker_( _In_ const DWORD_PTR keyIOComplete ) { CHashedLRUKCacheThreadLocalStorage* const pctls = (CHashedLRUKCacheThreadLocalStorage*)keyIOComplete; @@ -271,4 +275,7 @@ class CHashedLRUKCacheThreadLocalStorage // ctls QWORD m_rgibSlab[ s_cibSlab ]; ICachedBlockSlab* m_rgpcbsSlab[ s_cibSlab ]; volatile QWORD m_ibSlabWait; + + volatile DWORD m_cIORangeLocked; + volatile QWORD m_cbIORangeLocked; }; From d2b0c020a0df6a62b3fd3068f8625dbb79f2ab81 Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Mon, 10 Oct 2022 18:35:27 +0000 Subject: [PATCH 051/102] ESE Block Cache: Write Caching Percentage fixes For test purposes, EBC is run locally with "accept forever onetest stress" using the test configuration where all ESE files are cachable. We also configure the cache to be 50/50 read/write to exercise read caching. This differs from the current deployment in prod where we are at 0/100 read/write caching. I never really fully tested this part. While debugging issues where the stress test failed with JET_errInternalError when cleaning slabs during read/write operations, I discovered that this write caching percentage logic was broken. Basically, we would still fail to clean the slab in cases that actually should have worked because of this broken logic trying to protect the caching percentage. For example, we would refuse to cache a read because we thought we didn't have enough write cache but we really had plenty. We care about fixing this because it makes the stress test run longer without failure and it also is preparation for a likely future where both read and write caching are enabled in production. This change makes the following improvements which I think solve almost all of the ErrCleanSlab failure cases: - the logic in CCleanSlabVisitor related to counting what is in the read or write portion of the cache (i.e. clean blocks vs. updated blocks, as indicated by FEverDirty) was fixed to not count superceded pages and to count pages produced rather than visited - CCleanSlabVisitor was improved to - properly account for pages that are in the read or write portion of the cache (FValid, !FSuperceded, reads are !FEverDirty and writes are FEverDirty) - properly measure read/write cache blocks produced rather than just visited - identify the read/write cache blocks that are not protected by the write caching percentage for either read or write operations so that they can be written back and/or evicted - failing that, identify cases where a request (read or write) is ok because it won't change the write caching percentage. for example, a read can evict another read and a write can evict another write - failing that, if the caller says to ignore the write caching percentage then evict anyways - clean attempts that fail due to write caching percentage are now traced - we no longer try to get the IO Range Lock for superceded blocks because it isn't necessary - clean attempts that fail due to IO Range Lock failures have better tracing - ErrCleanSlab was improved to ignore (but emit an event) for a failure to cache reads as this isn't fatal - ErrCleanSlab was improved to retry on a failure to cache a write using the new flag to ignore write caching percentage because a failure must cause the write request to fail. this would only help for cases where there is read cache that was protected by the write caching percentage and not protected by an IO Range Lock [Substrate:66e142039555482b9e2626cfb3cdc1710b2df4ce] --- .../src/os/blockcache/_hashedlrukcache.hxx | 149 ++++++++++++------ 1 file changed, 101 insertions(+), 48 deletions(-) diff --git a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx index 3a28b812..e95e079f 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx @@ -1783,6 +1783,7 @@ class THashedLRUKCache _Inout_ ICachedBlockSlab** const ppcbs, _In_ const BOOL fRead, _In_ const QWORD cbRequested, + _In_ const BOOL fOverrideCachePercentage, _Out_ QWORD* const pcbProduced, _Out_ QWORD* const pcbWriteBackFailed ) { @@ -1801,7 +1802,15 @@ class THashedLRUKCache // those as well. { - CCleanSlabVisitor csv( pc, prequest, ppcbs, fRead, cbRequested, isv.CbTotal(), isv.CbWriteCache(), isv.CbReadCache() ); + CCleanSlabVisitor csv( pc, + prequest, + ppcbs, + fRead, + cbRequested, + fOverrideCachePercentage, + isv.CbTotal(), + isv.CbWriteCache(), + isv.CbReadCache() ); Call( csv.ErrTryCleanSlots() ); *pcbProduced = csv.CbProduced(); @@ -1831,6 +1840,7 @@ class THashedLRUKCache _Inout_ ICachedBlockSlab** const ppcbs, _In_ const BOOL fRead, _In_ const QWORD cbRequested, + _In_ const BOOL fOverrideCachePercentage, _In_ const QWORD cbTotal, _In_ const QWORD cbWriteCache, _In_ const QWORD cbReadCache ) @@ -1840,6 +1850,7 @@ class THashedLRUKCache m_pcbs( *ppcbs ), m_fRead( fRead ), m_cbRequested( cbRequested ), + m_fOverrideCachePercentage( fOverrideCachePercentage ), m_cbTotal( cbTotal ), m_pctWrite( max( 0, min( 100, m_pc->Pcconfig()->PctWrite() ) ) ), m_cbWriteCacheMax( (QWORD)( m_cbTotal * m_pctWrite / 100 ) ), @@ -1848,9 +1859,8 @@ class THashedLRUKCache m_cbReadCache( cbReadCache ), m_cbWriteCacheEligible( cbWriteCache > m_cbWriteCacheMax ? cbWriteCache - m_cbWriteCacheMax : 0 ), m_cbReadCacheEligible( cbReadCache > m_cbReadCacheMax ? cbReadCache - m_cbReadCacheMax : 0 ), - m_cbSeen( 0 ), - m_cbWriteCacheSeen( 0 ), - m_cbReadCacheSeen( 0 ), + m_cbWriteCacheProduced( 0 ), + m_cbReadCacheProduced( 0 ), m_cbInvalid( 0 ), m_cbIORangeLocked( 0 ), m_cbWriteBackPending( 0 ), @@ -1929,6 +1939,7 @@ class THashedLRUKCache const CCachedBlockId& cbid = slotstCurrent.Cbid(); const QWORD ibCachedBlock = (QWORD)cbid.Cbno() * cbCachedBlock; const COffsets offsets = COffsets( ibCachedBlock, ibCachedBlock - 1 + cbCachedBlock ); + BOOL fProduced = fFalse; BOOL fFileNoLongerExists = fFalse; CHashedLRUKCachedFileTableEntry* pcfte = NULL; BOOL fIORangeLocked = fFalse; @@ -1964,6 +1975,8 @@ class THashedLRUKCache m_cbInvalid += cbCachedBlock; + fProduced = fTrue; + Error( JET_errSuccess ); } @@ -1986,7 +1999,7 @@ class THashedLRUKCache // try to get the IO range lock - Call( ErrTryGetIORangeLock( pcfte, offsets, &fIORangeLocked ) ); + Call( ErrTryGetIORangeLock( slotstCurrent, pcfte, offsets, &fIORangeLocked ) ); // if the cached file still exists and this slot is dirty and not pinned and doesn't contain an // obsolete image of the data then try to write back its data to the cached file. we will mark it @@ -2007,6 +2020,8 @@ class THashedLRUKCache m_cbIORangeLocked -= cbCachedBlock; m_cbWriteBackPending += cbCachedBlock; + fProduced = fTrue; + Error( JET_errSuccess ); } @@ -2022,6 +2037,8 @@ class THashedLRUKCache m_cbIORangeLocked -= fEvicted && fIORangeLocked ? cbCachedBlock : 0; m_cbEvicted += fEvicted ? cbCachedBlock : 0; + fProduced = fEvicted; + // if we evicted this slot then we're done with it if ( fEvicted ) @@ -2037,6 +2054,8 @@ class THashedLRUKCache { m_cbInvalidatePending += cbCachedBlock; + fProduced = fTrue; + Error( JET_errSuccess ); } @@ -2046,56 +2065,62 @@ class THashedLRUKCache HandleError: m_pc->ReleaseCachedFile( &pcfte ); - m_cbSeen += cbCachedBlock; - m_cbReadCacheSeen += slotstCurrent.FEverDirty() ? 0 : cbCachedBlock; - m_cbWriteCacheSeen += slotstCurrent.FEverDirty() ? cbCachedBlock : 0; + m_cbReadCacheProduced += fProduced && !slotstCurrent.FEverDirty() ? cbCachedBlock : 0; + m_cbWriteCacheProduced += fProduced && slotstCurrent.FEverDirty() ? cbCachedBlock : 0; return err; } BOOL FProtectedFromClean( _In_ const CCachedBlockSlotState& slotstCurrent ) { - // we are cleaning to cache a read + // if this slot contains excess write cache then we can clean it - if ( m_fRead ) + if ( m_cbWriteCacheProduced < m_cbWriteCacheEligible ) { - // if this slot contains excess write cache then we can clean it - - if ( m_cbWriteCacheSeen < m_cbWriteCacheEligible ) - { - return fFalse; - } + return fFalse; + } - // if protecting this slot from clean would cause us to fail to clean enough space then we - // must clean it + // if this slot contains excess read cache then we can clean it - if ( m_cbReadCacheSeen < m_cbReadCache ) - { - return fFalse; - } + if ( m_cbReadCacheProduced < m_cbReadCacheEligible ) + { + return fFalse; } - // we are cleaning to cache a write + // if this is a write and the slot is not part of the read cache then we can clean it without + // increasing the write cache percentage - if ( !m_fRead ) + if ( !m_fRead && !( slotstCurrent.FValid() && !slotstCurrent.FSuperceded() && !slotstCurrent.FEverDirty() ) ) { - // if this slot contains excess read cache then we can clean it + return fFalse; + } - if ( m_cbReadCacheSeen < m_cbReadCacheEligible ) - { - return fFalse; - } + // if this is a read and the slot is not part of the write cache then we can clean it without + // reducing the write cache percentage - // if protecting this slot from clean would cause us to fail to clean enough space then we - // must clean it + if ( m_fRead && !( slotstCurrent.FValid() && !slotstCurrent.FSuperceded() && slotstCurrent.FEverDirty() ) ) + { + return fFalse; + } - if ( m_cbWriteCacheSeen < m_cbWriteCache ) - { - return fFalse; - } + // if we are overriding our efforts to preserve our write caching percentage then clean anyway + + if ( m_fOverrideCachePercentage ) + { + OSTrace( JET_tracetagBlockCacheOperations, + OSFormat( "C=%s R=0x%016I64x Clean %s Write Caching Percentage Overridden", + OSFormatFileId( m_pc ), + QWORD( m_prequest ), + OSFormat( slotstCurrent ) ) ); + return fFalse; } // this slot is protected from clean to preserve our write caching percentage + OSTrace( JET_tracetagBlockCacheOperations, + OSFormat( "C=%s R=0x%016I64x Clean ineligible %s Write Caching Percentage", + OSFormatFileId( m_pc ), + QWORD( m_prequest ), + OSFormat( slotstCurrent ) ) ); return fTrue; } @@ -2396,7 +2421,8 @@ class THashedLRUKCache THashedLRUKCache* const m_pc; }; - ERR ErrTryGetIORangeLock( _In_ CHashedLRUKCachedFileTableEntry* const pcfte, + ERR ErrTryGetIORangeLock( _In_ const CCachedBlockSlotState& slotstCurrent, + _In_ CHashedLRUKCachedFileTableEntry* const pcfte, _In_ const COffsets offsetsSlot, _Out_ BOOL* const pfIORangeLocked ) { @@ -2413,6 +2439,13 @@ class THashedLRUKCache Error( JET_errSuccess ); } + // if the block is superceded then we don't need the io range lock + + if ( slotstCurrent.FSuperceded() ) + { + Error( JET_errSuccess ); + } + // determine if we already have an IO range lock that covers the cached file block containing this // cache block @@ -2443,7 +2476,7 @@ class THashedLRUKCache if ( pcfte->FTryRequestIORangeLock( piorlNew, fFalse ) ) { OSTrace( JET_tracetagBlockCacheOperations, - OSFormat( "C=%s R=0x%016I64x Clean IORangeLock F=%s ib=%llu cb=%llu Grant", + OSFormat( "C=%s R=0x%016I64x Clean F=%s IORangeLock ib=%llu cb=%llu Grant", OSFormatFileId( m_pc ), QWORD( m_prequest ), OSFormatFileId( piorlNew->Pcfte()->Pff()), @@ -2459,9 +2492,10 @@ class THashedLRUKCache else { OSTrace( JET_tracetagBlockCacheOperations, - OSFormat( "C=%s R=0x%016I64x Clean IORangeLock F=%s ib=%llu cb=%llu not available", + OSFormat( "C=%s R=0x%016I64x Clean Ineligible %s F=%s IORangeLock ib=%llu cb=%llu not available", OSFormatFileId( m_pc ), QWORD( m_prequest ), + OSFormat( slotstCurrent ), OSFormatFileId( piorlNew->Pcfte()->Pff()), piorlNew->Offsets().IbStart(), piorlNew->Offsets().Cb() ) ); @@ -2593,8 +2627,11 @@ class THashedLRUKCache _In_ const CCachedBlockSlotState& slotstCurrent ) { m_cbTotal += cbCachedBlock; - m_cbReadCache += slotstCurrent.FValid() && !slotstCurrent.FEverDirty() ? cbCachedBlock : 0; - m_cbWriteCache += slotstCurrent.FValid() && slotstCurrent.FEverDirty() ? cbCachedBlock : 0; + if ( slotstCurrent.FValid() && !slotstCurrent.FSuperceded() ) + { + m_cbReadCache += !slotstCurrent.FEverDirty() ? cbCachedBlock : 0; + m_cbWriteCache += slotstCurrent.FEverDirty() ? cbCachedBlock : 0; + } return fTrue; } @@ -2759,6 +2796,7 @@ class THashedLRUKCache ICachedBlockSlab*& m_pcbs; const BOOL m_fRead; const QWORD m_cbRequested; + const BOOL m_fOverrideCachePercentage; const QWORD m_cbTotal; const double m_pctWrite; const QWORD m_cbWriteCacheMax; @@ -2767,9 +2805,8 @@ class THashedLRUKCache const QWORD m_cbReadCache; const QWORD m_cbWriteCacheEligible; const QWORD m_cbReadCacheEligible; - QWORD m_cbSeen; - QWORD m_cbWriteCacheSeen; - QWORD m_cbReadCacheSeen; + QWORD m_cbWriteCacheProduced; + QWORD m_cbReadCacheProduced; QWORD m_cbInvalid; QWORD m_cbIORangeLocked; QWORD m_cbWriteBackPending; @@ -9279,14 +9316,19 @@ ERR THashedLRUKCache::ErrCleanSlab( _In_ CRequest* const preq // if we don't already have enough then clean until we do or we cannot clean any more - QWORD cClean = 0; - QWORD cbWriteBackFailedPrev = 0; - QWORD cbWriteBackFailed = 0; + QWORD cClean = 0; + QWORD cbWriteBackFailedPrev = 0; + QWORD cbWriteBackFailed = 0; + BOOL fOverrideCachePercentage = fFalse; while ( cbClean > *pcbClean && - ( cClean == 0 || cbWriteBackFailed > cbWriteBackFailedPrev ) && + ( cClean == 0 || + cbWriteBackFailed > cbWriteBackFailedPrev || + !fRead ) && cClean < cCleanMax ) { + fOverrideCachePercentage = cClean > 0 && cbWriteBackFailed == cbWriteBackFailedPrev && !fRead; + cbWriteBackFailedPrev = cbWriteBackFailed; Call( CCleanSlabVisitor::ErrExecute( this, @@ -9294,6 +9336,7 @@ ERR THashedLRUKCache::ErrCleanSlab( _In_ CRequest* const preq ppcbs, fRead, cbClean + cbWriteBackFailedPrev, + fOverrideCachePercentage, pcbClean, &cbWriteBackFailed ) ); @@ -9304,7 +9347,17 @@ ERR THashedLRUKCache::ErrCleanSlab( _In_ CRequest* const preq if ( cbClean > *pcbClean ) { - Error( ErrBlockCacheInternalError( "ErrCleanSlab" ) ); + // if we are cleaning for a read then track the failure but the failure will be ignored + + if ( fRead ) + { + BlockCacheNotableEvent( "CleanSlabForRead" ); + Error( ErrERRCheck( JET_errFileIOFail ) ); + } + + // we have experienced a fatal error trying to cache a write + + Error( ErrBlockCacheInternalError( "CleanSlabForWrite" ) ); } HandleError: From 8db8c4a3559c52b16afc62529cbbd6c523f776d1 Mon Sep 17 00:00:00 2001 From: Umair Ahmad Date: Tue, 11 Oct 2022 06:02:00 +0000 Subject: [PATCH 052/102] Fix bad deref in non-revertable deleted table error event while trying to get table name. Caused AV in RevertSnapshotSuite tests. [Substrate:4deb65b0162768c7163e987f39038d11ca499d8e] --- dev/ese/src/ese/fucb.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/ese/src/ese/fucb.cxx b/dev/ese/src/ese/fucb.cxx index 01643d66..5a2ab2cd 100644 --- a/dev/ese/src/ese/fucb.cxx +++ b/dev/ese/src/ese/fucb.cxx @@ -381,7 +381,7 @@ VOID FUCBIllegalOperationFDPToBeDeleted( if ( pfcbTable != NULL && pfcbTable->Ptdb() != NULL && pfcbTable->Ptdb()->SzTableName() != NULL ) { - OSStrCbFormatW( wszTableName, sizeof(wszTableName), L"%hs", pfucb->u.pfcb->Ptdb()->SzTableName() ); + OSStrCbFormatW( wszTableName, sizeof(wszTableName), L"%hs", pfcbTable->Ptdb()->SzTableName() ); } const WCHAR* rgwsz[] = From 023d15af89055fc810a9ffca142e96163797e543 Mon Sep 17 00:00:00 2001 From: Brett Shirley Date: Tue, 11 Oct 2022 11:11:42 +0000 Subject: [PATCH 053/102] A slew of JetInit and Failure Item diagnostic enhancements Changes: - Add new START_INSTANCE_FAILED_ID / Event ID 109 for JetInit() failures all up. - Add INST tracking to see if Failure Items we're thrown on this instance for that event. - Track all failed JetInit() calls in a new .IRS.RAW file as well, so they are not missed. - Promote some cpage errors to corruption failure items. - Tweak some other failure items in logredo.cxx that seem wrong. - Add diagnostics for missed failure items. - Make LastThrow only track negative error values (trying to enhance probability that error at end of a JET API is what caused it to fail). - Also enhanced !ese dump dbfilehdr to read the DB header off the disk, instead of pulling it from memory. - Added a sev3 / email Alert monitor if we failed JetInit with a corruption failure item. - Added optics parsing to break out the JetInit failure to Activating versus Replay, and then for Corruption category - if we triggered an HA failure item or not. [Substrate:f14f4dc838f57ac9f4c4827bbe49df4f0b3dc844] --- dev/ese/published/inc/os/error.hxx | 11 +- dev/ese/published/inc/os/hapublish.hxx | 4 + dev/ese/src/_res/jetmsg.mc | 11 ++ dev/ese/src/ese/_log/logredo.cxx | 10 +- dev/ese/src/ese/_osu/hapublishu.cxx | 56 +++++++- dev/ese/src/ese/cpage.cxx | 9 -- dev/ese/src/ese/jetapi.cxx | 173 ++++++++++++++++++++++++- dev/ese/src/ese/jettest.cxx | 10 +- dev/ese/src/ese/sysinit.cxx | 10 ++ dev/ese/src/inc/daedef.hxx | 7 +- dev/ese/src/inc/jettest.hxx | 3 + dev/ese/src/inc/log.hxx | 4 + dev/ese/src/os/edbg.cxx | 99 +++++++++++--- 13 files changed, 359 insertions(+), 48 deletions(-) diff --git a/dev/ese/published/inc/os/error.hxx b/dev/ese/published/inc/os/error.hxx index 08327908..0437a6cf 100644 --- a/dev/ese/published/inc/os/error.hxx +++ b/dev/ese/published/inc/os/error.hxx @@ -523,12 +523,6 @@ public: __forceinline CErrFrameSimple * PefLastThrow(); -__forceinline ERR ErrERRSetLastThrow( _In_ const CHAR* szFile, _In_ const LONG lLine, _In_ const ERR err ) -{ - PefLastThrow()->Set( szFile, lLine, err ); - return err; -} - // Returns the line of the last call that failed out w/ an error, presumably within this frame. ULONG UlLineLastCall(); @@ -569,7 +563,10 @@ ERR ErrERRCheck_( const ERR err, const CHAR* szFile, const LONG lLine ); __forceinline ERR ErrERRCheck_( _In_ const ERR err, _In_ const CHAR* szFile, _In_ const LONG lLine ) { extern ERR g_errTrap; - PefLastThrow()->Set( szFile, lLine, err ); + if ( err < 0 /* JET_errSuccess */ ) + { + PefLastThrow()->Set( szFile, lLine, err ); + } if ( g_errTrap == err ) { KernelDebugBreakPoint(); diff --git a/dev/ese/published/inc/os/hapublish.hxx b/dev/ese/published/inc/os/hapublish.hxx index 93929b87..b0a24727 100644 --- a/dev/ese/published/inc/os/hapublish.hxx +++ b/dev/ese/published/inc/os/hapublish.hxx @@ -8,6 +8,10 @@ #include #include "exdbmsg_ese.h" +const DWORD bitHaPublishedEvent = 0x1; +const DWORD bitHaPublishedCorruptionTag = 0x2; +const DWORD bitHaPublishedIoHardTag = 0x4; + #define Ese2HaId( id ) ( HADBFAILURE_EVENT_RANGE_START_ESE + ( id ) ) #define OSUHAPublishEvent( p0, p1, p2, p3, p4, p5, p6, p7, p8, p9 ) \ diff --git a/dev/ese/src/_res/jetmsg.mc b/dev/ese/src/_res/jetmsg.mc index fc64cb61..69a4ed34 100644 --- a/dev/ese/src/_res/jetmsg.mc +++ b/dev/ese/src/_res/jetmsg.mc @@ -215,6 +215,17 @@ Language=English %1 (%2) %3The specific ESE configuration store is locked in a read inhibit state, clear the %1 registry value to enable ESE to continue and utilize the config store. . +MessageId=109 +SymbolicName=START_INSTANCE_FAILED_ID +Language=English +%1 (%2) %3The database engine failed to start instance (%4) due to error %5. (Time=%6 seconds) +%n +Failure Details:%n +Mode: %7%n +Fail Address: %8%n +Publishing: %9%n +. + ;// You are almost assuredly not adding in the right place? diff --git a/dev/ese/src/ese/_log/logredo.cxx b/dev/ese/src/ese/_log/logredo.cxx index 0f8f0a51..dab4a20a 100644 --- a/dev/ese/src/ese/_log/logredo.cxx +++ b/dev/ese/src/ese/_log/logredo.cxx @@ -3570,6 +3570,10 @@ ERR LOG::ErrLGRIRedoNodeOperation( const LRNODE_ *plrnode, ERR *perr ) Assert( (ULONG)data.Cb() == cbNewData ); if ( (ULONG)data.Cb() != cbNewData ) { + // per analysis of a real world case, it is hard to imagine how local (passive) data or remote (active) + // database data generated this incorrectness. This is literally saying the ib/cb pairs do NOT add up + // to the final record size (from the active). This almost assuredly means that there was a corruption + // of the actual log record data. Or a bug in our diff creation or reconstruction alg. OSUHAEmitFailureTag( m_pinst, HaDbFailureTagCorruption, L"a3cb57b9-8ba1-496d-a6fc-4fc2f0140fc4" ); Error( ErrERRCheck( JET_errLogCorrupted ) ); } @@ -5283,7 +5287,7 @@ ERR LOG::ErrLGRICheckRedoAttachDb( goto HandleError; } } - else if ( JET_errReadVerifyFailure == err ) + else if ( FErrIsDbCorruption( err ) ) { reason = eDARHeaderCorrupt; if ( pfmp->FIgnoreDeferredAttach() ) @@ -5293,8 +5297,8 @@ ERR LOG::ErrLGRICheckRedoAttachDb( } else { - // the log file header is corrupt - OSUHAEmitFailureTag( m_pinst, HaDbFailureTagRecoveryRedoLogCorruption, L"9106f5c1-2f93-479b-a12a-c93c6ab3de68" ); + // the DB file header is corrupt + OSUHAEmitFailureTag( m_pinst, HaDbFailureTagCorruption, L"9106f5c1-2f93-479b-a12a-c93c6ab3de68" ); goto HandleError; } } diff --git a/dev/ese/src/ese/_osu/hapublishu.cxx b/dev/ese/src/ese/_osu/hapublishu.cxx index e0d0cdad..57eea8ef 100644 --- a/dev/ese/src/ese/_osu/hapublishu.cxx +++ b/dev/ese/src/ese/_osu/hapublishu.cxx @@ -20,8 +20,23 @@ void OSUHAPublishEvent_( DWORD cParameter, const WCHAR** rgwszParameter ) { + BOOL fEmit = fTrue; + // failure events need not be published if there is no instance - if ( pinstNil != pinst && UlParam( pinst, JET_paramEnableHaPublish ) ) + // update: why? + if ( pinst == NULL || pinstNil == pinst ) + { + FireWall( "SkipFi2NoInst" ); + fEmit = fFalse; + } + + if ( !UlParam( pinst, JET_paramEnableHaPublish ) ) + { + // might be nice to Assert/FireWall not O365 Datacenter / Store.worker, but a bit of a layer violation + fEmit = fFalse; + } + + if ( fEmit ) { OSUHAPublishEventImpl( haTag, pinst->m_wszInstanceName, @@ -131,10 +146,27 @@ void OSUHAEmitFailureTag_( } } + // FUTURE: HA Publish is only for O365 datacenter, but even so this is a bit of a layering violation. We will + // add these temporarily to do a basic health check on O365 to see if we're dropping HA FailureItems from any ESE + // code paths. + const BOOL fO365StoreWorker = ( _wcsicmp( WszUtilProcessName(), L"Microsoft.Exchange.Store.Worker" ) == 0 ); + const BOOL fO365DatacenterProcess = + fO365StoreWorker || + ( _wcsicmp( WszUtilProcessName(), L"MSExchangeRepl" ) == 0 ) || + ( _wcsicmp( WszUtilProcessName(), L"EdgeTransport" ) == 0 ) || + ( _wcsicmp( WszUtilProcessName(), L"Microsoft.Exchange.DxStore.HA.Instance" ) == 0 ) || + ( _wcsicmp( WszUtilProcessName(), L"Microsoft.Exchange.SharedCache" ) == 0 ) || + ( _wcsicmp( WszUtilProcessName(), L"Microsoft.Exchange.Store.Service" ) == 0 ); // calls JET APIs, but should not actually start ese inst + // should we add eseutil? + // if the instance pointer is NULL then do not emit an event // if ( !pinstActual ) { + if ( !FInEmbeddedUnitTest() ) + { + FireWall( "SkipFiNoInstActualX" ); + } fEmit = fFalse; } @@ -142,6 +174,10 @@ void OSUHAEmitFailureTag_( // if ( pinstActual && !UlParam( pinstActual, JET_paramEnableHaPublish ) ) { + if ( fO365StoreWorker ) + { + FireWall( "SkipFiHaPublishOff" ); + } fEmit = fFalse; } @@ -151,6 +187,11 @@ void OSUHAEmitFailureTag_( ( !pinstActual->m_wszInstanceName || !pinstActual->m_wszInstanceName[ 0 ] || !pinstActual->m_wszDisplayName || !pinstActual->m_wszDisplayName[ 0 ] ) ) { + // many test processes have this off, but all real ESE instances should be correctly identified. + if ( fO365DatacenterProcess ) + { + FireWall( "SkipFiNoInstOrDispName" ); + } fEmit = fFalse; } @@ -158,6 +199,7 @@ void OSUHAEmitFailureTag_( // if ( haTag == HaDbFailureTagNoOp ) { + FireWall( "SkipFiTagNoOp" ); fEmit = fFalse; } @@ -165,6 +207,7 @@ void OSUHAEmitFailureTag_( // if ( !wszGuid || !wszGuid[ 0 ] ) { + FireWall( "SkipFiNoGuid" ); fEmit = fFalse; } @@ -176,6 +219,7 @@ void OSUHAEmitFailureTag_( HA_NOOP_FAILURE_TAG_ID + msgidOffset <= HA_NOOP_FAILURE_TAG_ID || HA_NOOP_FAILURE_TAG_ID + msgidOffset > HA_MAX_FAILURE_TAG_ID ) { + FireWall( "SkipFiEvtOutOfRange" ); fEmit = fFalse; } @@ -214,6 +258,16 @@ void OSUHAEmitFailureTag_( HA_NOOP_FAILURE_TAG_ID + msgidOffset, iwsz, rgwsz ); + + AtomicExchangeSet( (ULONG*)&pinst->m_grbitHaFailureTags, (ULONG)bitHaPublishedEvent ); + if ( haTag == HaDbFailureTagCorruption ) + { + AtomicExchangeSet( (ULONG*)&pinst->m_grbitHaFailureTags, (ULONG)bitHaPublishedCorruptionTag ); + } + if ( haTag == HaDbFailureTagIoHard ) + { + AtomicExchangeSet( (ULONG*)&pinst->m_grbitHaFailureTags, (ULONG)bitHaPublishedIoHardTag ); + } } // cleanup diff --git a/dev/ese/src/ese/cpage.cxx b/dev/ese/src/ese/cpage.cxx index d2e39137..7e1476d3 100644 --- a/dev/ese/src/ese/cpage.cxx +++ b/dev/ese/src/ese/cpage.cxx @@ -4912,20 +4912,14 @@ ERR CPAGE::ErrCheckPage( // The whole line is starting above the data start, i.e. off the data section, and possibly even off page. MakeCorruptionDetailsSz( L"TAG %d computed offset starts too high (ib=%d, cb=%d, %p > %p)", itag, ib, cb, line.pv, pbPageDataEnd ); (*pcprintf)( "%ws\r\n", wszCorruptionDetails ); - PageAssertTrack( *this, fFalse, "LineEntirelyAboveDataSection" ); -#ifdef DEBUG Error( ErrCaptureCorruptedPageInfoSz( mode, L"LineEntirelyAboveDataSection", wszCorruptionDetails, fLogEvent ) ); -#endif } if ( pbLineLastByte > pbPageDataEnd ) { // The line ends above the data start, i.e. off the data section, but does start / overlaping in valid data section. MakeCorruptionDetailsSz( L"TAG %d computed offset starts too high (ib=%d, cb=%d, %p > %p)", itag, ib, cb, pbLineLastByte, pbPageDataEnd ); (*pcprintf)( "%ws\r\n", wszCorruptionDetails ); - PageAssertTrack( *this, FNegTest( fCorruptingPageLogically ), "LineEndsAboveDataSection" ); -#ifdef DEBUG Error( ErrCaptureCorruptedPageInfoSz( mode, L"LineEndsAboveDataSection", wszCorruptionDetails, fLogEvent ) ); -#endif } if ( errGetLine < JET_errSuccess || !FOnData( line.pv, line.cb ) ) @@ -4937,10 +4931,7 @@ ERR CPAGE::ErrCheckPage( MakeCorruptionDetailsSz( L"UNCAUGHT: TAG %d ErrGetPtr() failed or got line off page (ib=%d, cb=%d, err=%d,f=%d).", itag, ib, cb, errGetLine, FOnData( line.pv, line.cb ) ); (*pcprintf)( "%ws\r\n", wszCorruptionDetails ); // there should not be too many errors coming from ErrGetLine() that we can't embed the err in the corruption type. - PageAssertTrack( *this, FNegTest( fCorruptingPageLogically ), "GetLineFailed:%d\n", errGetLine ); -#ifdef DEBUG Error( ErrCaptureCorruptedPageInfoSz( mode, wszGetLineErr, wszCorruptionDetails, fLogEvent ) ); -#endif } // do some simple KEYDATAFLAGS checks diff --git a/dev/ese/src/ese/jetapi.cxx b/dev/ese/src/ese/jetapi.cxx index 425c01a2..e49369ff 100644 --- a/dev/ese/src/ese/jetapi.cxx +++ b/dev/ese/src/ese/jetapi.cxx @@ -1549,6 +1549,92 @@ VOID PERFSetDatabaseNames( IFileSystemAPI* const pfsapi ) } +#ifdef ENABLE_MICROSOFT_MANAGED_DATACENTER_LEVEL_OPTICS + +// +// Trace to an IRS.RAW the init failure. +// + +void DumpFailedInitToIrsRaw( + _In_ INST * pinst, + _In_ PCWSTR wszInstDisplayName, + _In_ PCWSTR wszErrorState, + _In_ PCWSTR wszSeconds, + _In_ PCWSTR wszFailingMode, + _In_ PCWSTR wszFailingAddress, + _In_ PCWSTR wszHaPublishingFacts ) +{ + __int64 fileTime; + WCHAR wszDate[32]; + WCHAR wszTime[32]; + size_t cchRequired; + WCHAR wszInstIrsFile[ 5 /* Inst- */ + 3 /* inst log base name */ + 1 ]; + WCHAR wszInstIrsPathBase[ OSFSAPI_MAX_PATH ]; + CPRINTF * pcprintfPageTrace = NULL; + + if ( pinst == NULL || pinst->m_pfsapi == NULL ) + { + FireWall( "InstIrsUnexpectedInitExitBeforeInstOrPfsapiAlloc" ); + return; + } + + if ( ( SzParam( pinst, JET_paramLogFilePath ) == NULL ) || + ( SzParam( pinst, JET_paramLogFilePath )[0] == L'\0' ) ) + { + FireWall( "InstIrsLogPathNotSet" ); + return; + } + + if ( ( SzParam( pinst, JET_paramBaseName ) == NULL ) || + ( SzParam( pinst, JET_paramBaseName )[0] == L'\0' ) ) + { + FireWall( "InstIrsBaseNameNotSet" ); + return; + } + + // make path + // + OSStrCbFormatW( wszInstIrsFile, sizeof( wszInstIrsFile ), L"Inst-%ws", SzParam( pinst, JET_paramBaseName ) ); + ERR errT = pinst->m_pfsapi->ErrPathBuild( + SzParam( pinst, JET_paramLogFilePath ), + wszInstIrsFile, + L"", // ext filled by IRS func / ErrBeginDatabaseIncReseedTracing() + wszInstIrsPathBase, + sizeof( wszInstIrsPathBase ) ); + if ( errT < JET_errSuccess ) + { + FireWall( "InstIrsPathBuildFail" ); + return; + } + + // start tracing (before anything else) + // + errT = ErrBeginDatabaseIncReseedTracing( pinst->m_pfsapi, wszInstIrsPathBase, &pcprintfPageTrace ); + if ( errT < JET_errSuccess ) + { + FireWall( "InstIrsFailedIrsOpen" ); + return; + } + + fileTime = UtilGetCurrentFileTime(); + ErrUtilFormatFileTimeAsTimeWithSeconds( fileTime, wszTime, _countof(wszTime), &cchRequired); + ErrUtilFormatFileTimeAsDate( fileTime, wszDate, _countof(wszDate), &cchRequired); + (*pcprintfPageTrace)( "Begin " __FUNCTION__ "() @ Time %ws %ws\r\n", wszTime, wszDate ); + + // Consider adding ERRFormatIssueSource() to get last error information and Server Version. + (*pcprintfPageTrace)( "JetInit (%ws) Failed with %ws in %ws seconds.\r\n", wszInstDisplayName, wszErrorState, wszSeconds ); + (*pcprintfPageTrace)( "Failing Mode: %ws\r\n", wszFailingMode ); + (*pcprintfPageTrace)( "Failing Address: %ws\r\n", wszFailingAddress ); + (*pcprintfPageTrace)( "HA Pub Facts: %ws\r\n", wszHaPublishingFacts ); + + EndDatabaseIncReseedTracing( &pcprintfPageTrace ); + + return; +} + +#endif // ENABLE_MICROSOFT_MANAGED_DATACENTER_LEVEL_OPTICS + + // // CIsamSequenceDiagLog // @@ -1778,6 +1864,9 @@ __int64 CIsamSequenceDiagLog::UsecTimer( _In_ INT seqBegin, _In_ const INT seqEn return 0; } + Expected( FTriggeredSequence_( 0 ) ); // be odd to have not started sequence and ask for timings + Expected( seqEnd + 1 != m_cseqMax || FTriggeredSequence_( seqEnd ) ); + if ( !FValidSequence_( seqBegin ) || !FValidSequence_( seqEnd ) || seqBegin >= seqEnd || @@ -1793,9 +1882,10 @@ __int64 CIsamSequenceDiagLog::UsecTimer( _In_ INT seqBegin, _In_ const INT seqEn { seqBegin--; } - + Expected( seqBegin < seqEnd ); // this should be true unless we had a failure before the 2nd sequence (seq = 1). Let us see if it happens. + if ( !FTriggeredSequence_( seqBegin ) || - !FTriggeredSequence_( seqEnd ) ) + !FTriggeredSequence_( seqEnd ) ) { return 0; } @@ -3138,7 +3228,7 @@ class CInstanceFileSystemConfiguration : public CDefaultFileSystemConfiguration // initialize this setting if ( m_permillageSmoothIo == dwMax ) { - // Exs: 999‰ = 99.9% Smooth, 990‰ = 99.0% Smooth, 900‰ = 90.0% Smooth. Debug default = 0.2% + // Exs: 999� = 99.9% Smooth, 990� = 99.0% Smooth, 900� = 90.0% Smooth. Debug default = 0.2% ULONG permillageSmoothIo = OnDebugOrRetail( 2, CDefaultFileSystemConfiguration::PermillageSmoothIo() ); if ( m_pinst ) @@ -21480,10 +21570,10 @@ LOCAL JET_ERR JetInitEx( const ULONG cbTimingResourceDataSequence = pinst->m_isdlInit.CbSprintTimings(); WCHAR * wszTimingResourceDataSequence = (WCHAR *)_alloca( cbTimingResourceDataSequence ); pinst->m_isdlInit.SprintTimings( wszTimingResourceDataSequence, cbTimingResourceDataSequence ); - const __int64 secsInit = pinst->m_isdlInit.UsecTimer( eSequenceStart, eInitDone ) / 1000000; // convert to seconds - WCHAR wszSeconds[16]; + const double secsInit = (double)pinst->m_isdlInit.UsecTimer( eSequenceStart, eInitDone ) / 1000000.0; // convert to seconds + WCHAR wszSeconds[30]; WCHAR wszInstId[16]; - OSStrCbFormatW( wszSeconds, sizeof(wszSeconds), L"%I64d", secsInit ); + OSStrCbFormatW( wszSeconds, sizeof(wszSeconds), L"%.3f", secsInit ); OSStrCbFormatW( wszInstId, sizeof(wszInstId), L"%d", IpinstFromPinst( pinst ) ); const WCHAR * rgszT[4] = { wszInstId, wszSeconds, wszTimingResourceDataSequence, wszAdditionalFixedData }; @@ -21520,6 +21610,77 @@ LOCAL JET_ERR JetInitEx( { const WCHAR* wszInstDisplayName = ( pinst != NULL && pinst->m_wszDisplayName != NULL ? pinst->m_wszDisplayName : L"_unknown_" ); OSDiagTrackInit( wszInstDisplayName, pinst->m_plog->QwSignLogHash(), err ); + + // avoiding quick and dirty non-localized insert text on windows +#ifdef ENABLE_MICROSOFT_MANAGED_DATACENTER_LEVEL_OPTICS + + pinst->m_isdlInit.Trigger( eInitDone ); + const double secsInit2 = (double)pinst->m_isdlInit.UsecTimer( eSequenceStart, eInitDone ) / 1000000.0; // convert to seconds + WCHAR wszSeconds2[30]; + OSStrCbFormatW( wszSeconds2, sizeof(wszSeconds2), L"%.3f", secsInit2 ); + + WCHAR wszErrorState[120]; + JET_ERRCAT errcatMostSpecific = JET_errcatUnknown; + (void)ErrERRLookupErrorCategory( err, &errcatMostSpecific ); + if ( PefLastThrow() && err == PefLastThrow()->Err() ) + { + PERSISTED // for optics "(JET_errcat: 10)", etc. see Exch \ EseEventCategorized.cs. + OSStrCbFormatW( wszErrorState, sizeof(wszErrorState), L"%d (JET_errcat: %d) (src: %hs:%d)", err, errcatMostSpecific, SzSourceFileName( PefLastThrow()->SzFile() ), PefLastThrow()->UlLine() ); + } + else + { + PERSISTED // for optics "(JET_errcat: 10)", etc. see Exch \ EseEventCategorized.cs. + OSStrCbFormatW( wszErrorState, sizeof(wszErrorState), L"%d (JET_errcat: %d)", err, errcatMostSpecific ); + } + + WCHAR wszFailingMode[2] = { WchReportInstState( pinst ), L'\0' }; + + WCHAR wszFailingAddress[60]; + // The normal way of detecting recovery \ redo via: + // plog->FRecovering() && plog->FRecoveringMode() == fRecoveringRedo + // is controlled and cleaned up by this point even on an error. However, fortunately + // the pinst->m_perfstatusEvent mode is one way during init, and not reset until next + // call to JetInit() so we use this method for determining what mode we reached. + const BOOL fRedo = pinst->m_perfstatusEvent == perfStatusRecoveryRedo; + const BOOL fUndo = pinst->m_perfstatusEvent == perfStatusRecoveryUndo; + const BOOL fDo = pinst->m_perfstatusEvent == perfStatusRuntime; + // Normal method of getting lpgosRedo (plog->LgposLGLogTipNoLock()) won't work for + // the same reason the regular mode computation, computes it wrong above. But the + // actual lgpos we want is in m_lgposRedo, so use special function to fetch it. + LGPOS lgposFailed = !fUndo ? // just in case, we treat everything besides undo as redo. + pinst->m_plog->LgposDiagnosticRedoFailedAddress() : + pinst->m_plog->LgposLGLogTipNoLock(); // undo address comes from live lgpos tip. + // Can imagine actually sticking other pieces of address in here, like the pgno the LR was + // referencing, or even logical descriptions like "DbfilehdrReadErr" or something. + OSStrCbFormatW( wszFailingAddress, sizeof( wszFailingAddress ), + L"lgpos%hs:%08x:%04x:%04x", + fRedo ? "Redo" : + ( fUndo ? "Undo" : + ( fDo ? "RedoOld" : + "Redo-Unconfirmed" ) ), + lgposFailed.lGeneration, lgposFailed.isec, lgposFailed.ib ); + + WCHAR wszHaPublishingFacts[300]; + PERSISTED // for optics "Verbose: 1", "FI Tags Published: 0x", and "FiCorruptionTag ". see Exch \ EseEventCategorized.cs, Exch \ EseDatabaseMonitoringContext.cs + (void)ErrOSStrCbFormatW( wszHaPublishingFacts, sizeof( wszHaPublishingFacts ), L"Verbose: %d, FI Tags Published: 0x%x ( %hs%hs)", + !!pinst->m_isdlInit.FTriggeredStep( eInitLogRecoverySilentRedoDone ), + pinst->m_grbitHaFailureTags, +#if defined( USE_HAPUBLISH_API ) + ( pinst->m_grbitHaFailureTags & bitHaPublishedCorruptionTag ) ? "FiCorruptionTag " : "", + ( pinst->m_grbitHaFailureTags & bitHaPublishedIoHardTag ) ? "FiIoHardTag " : "" ); + ( pinst->m_grbitHaFailureTags & bitHaPublishedIoHardTag ) ? "FiIoHardTag " : "" +#else + "", "" +#endif + ); + + const WCHAR * rgszFailT[6] = { wszInstDisplayName, wszErrorState, wszSeconds2, wszFailingMode, wszFailingAddress, wszHaPublishingFacts }; + + UtilReportEvent( eventError, GENERAL_CATEGORY, START_INSTANCE_FAILED_ID, _countof( rgszFailT ), rgszFailT, 0, NULL, pinst ); + + // Also to avoid event wrap, report failures in JetInit() to .IRS.RAW + DumpFailedInitToIrsRaw( pinst, wszInstDisplayName, wszErrorState, wszSeconds2, wszFailingMode, wszFailingAddress, wszHaPublishingFacts ); +#endif } // if instance allocated in this function call diff --git a/dev/ese/src/ese/jettest.cxx b/dev/ese/src/ese/jettest.cxx index 90948cfc..4868a537 100644 --- a/dev/ese/src/ese/jettest.cxx +++ b/dev/ese/src/ese/jettest.cxx @@ -5,7 +5,9 @@ #include "PageSizeClean.hxx" -#ifdef ENABLE_JET_UNIT_TEST +#ifndef ENABLE_JET_UNIT_TEST +#error "File jettest.cxx is only supposed to be referenced / compiled in the unit test .vcxproj file." +#endif #include @@ -537,5 +539,7 @@ void JetTestEnforceSEHException::Cleanup() s_pThreadExcep = NULL; } -#endif // ENABLE_JET_UNIT_TEST - +BOOL FInEmbeddedUnitTest() +{ + return fTrue; +} diff --git a/dev/ese/src/ese/sysinit.cxx b/dev/ese/src/ese/sysinit.cxx index d14c0424..94272cd9 100644 --- a/dev/ese/src/ese/sysinit.cxx +++ b/dev/ese/src/ese/sysinit.cxx @@ -11,6 +11,16 @@ BOOL g_fDBGPerfOutput = fFalse; #endif /* DEBUG || PERFDUMP */ +// This is here, because jettest.cxx is only compiled in eselibwithtest.dll + +#ifndef ENABLE_JET_UNIT_TEST + +BOOL FInEmbeddedUnitTest() +{ + return fFalse; +} + +#endif // ENABLE_JET_UNIT_TEST #ifdef DEBUG diff --git a/dev/ese/src/inc/daedef.hxx b/dev/ese/src/inc/daedef.hxx index 8c44620d..6cf79464 100644 --- a/dev/ese/src/inc/daedef.hxx +++ b/dev/ese/src/inc/daedef.hxx @@ -3465,7 +3465,7 @@ INLINE ERR DBFILEHDR::DumpLite( CPRINTF* pcprintf, const char * const szNewLine, (*pcprintf)( "Revert Page Count: %u%s", (ULONG) le_ulRevertPageCount, szNewLine ); lgpos = le_lgposCommitBeforeRevert; - (*pcprintf)( "Last Commit Before Revert: (0x%X,%X,%X) ", lgpos.lGeneration, lgpos.isec, lgpos.ib ); + (*pcprintf)( "Last Commit Before Revert: (0x%X,%X,%X) %s", lgpos.lGeneration, lgpos.isec, lgpos.ib, szNewLine ); return JET_errSuccess; } @@ -5032,6 +5032,9 @@ public: BOOL m_fTermInProgress; BOOL m_fTermAbruptly; INST_STINIT m_fSTInit; + // Note: This status is not cleaned up if we fail in middle of Redo, Undo and this fact + // is used at end of JetInitEx() to log what mode we failed in. So do not reset this on + // error paths INT m_perfstatusEvent; // Redo, Undo, Runtime/Do-time, and Term. BOOL m_fBackupAllowed; @@ -5233,6 +5236,8 @@ public: CIsamSequenceDiagLog m_isdlInit; CIsamSequenceDiagLog m_isdlTerm; + volatile DWORD m_grbitHaFailureTags; + private: ERR ErrAPIAbandonEnter_( const LONG lOld ); diff --git a/dev/ese/src/inc/jettest.hxx b/dev/ese/src/inc/jettest.hxx index eebb9b53..92c623dc 100644 --- a/dev/ese/src/inc/jettest.hxx +++ b/dev/ese/src/inc/jettest.hxx @@ -356,5 +356,8 @@ void Test##component##test::Run_() #endif // !ENABLE_JET_UNIT_TEST +// defined in both ese.dll and eselibwithtest.dll, but returning different answer +BOOL FInEmbeddedUnitTest(); + #endif // JETTEST_HXX_INCLUDED diff --git a/dev/ese/src/inc/log.hxx b/dev/ese/src/inc/log.hxx index 27c685a0..8eaf7b02 100644 --- a/dev/ese/src/inc/log.hxx +++ b/dev/ese/src/inc/log.hxx @@ -1285,6 +1285,10 @@ public: BOOL FLastLRIsShutdown() const { return m_fLastLRIsShutdown; } LGPOS LgposShutDownMark() const { return m_lgposRedoShutDownMarkGlobal; } + // Note: Generally people should not be interested in lgposRedo, but failure event code has a need to + // know it directly. + LGPOS LgposDiagnosticRedoFailedAddress() const { return m_lgposRedo; } + VOID LGRRemoveFucb( FUCB * pfucb ); ERR ErrLGMostSignificantRecoveryWarning( void ); diff --git a/dev/ese/src/os/edbg.cxx b/dev/ese/src/os/edbg.cxx index c81f26bc..73058a7a 100644 --- a/dev/ese/src/os/edbg.cxx +++ b/dev/ese/src/os/edbg.cxx @@ -665,7 +665,7 @@ const INT cfuncmap = sizeof( rgfuncmap ) / sizeof( EDBGFUNCMAP ); #define DUMPA( _struct ) { #_struct, &(CDUMPA<_struct>::instance), #_struct "
" } -#define DUMPAA( _struct, addlargs ) { #_struct, &(CDUMPA<_struct>::instance), #_struct "
" addlargs } +#define DUMPAA( _struct, addlargs ) { #_struct, &(CDUMPA<_struct>::instance), #_struct "
" addlargs } // ================================================================ @@ -691,20 +691,20 @@ LOCAL const CDUMPMAP rgcdumpmap[] = { DUMPA( LOG_STREAM ), DUMPA( LOG_WRITE_BUFFER ), DUMPA( VER ), - DUMPAA( MEMPOOL, "[|*] - =specified tag only, *=all tags" ), + DUMPAA( MEMPOOL, " [|*] - =specified tag only, *=all tags" ), DUMPA( SPLIT ), DUMPA( SPLITPATH ), DUMPA( MERGE ), DUMPA( MERGEPATH ), - DUMPA( DBFILEHDR ), + DUMPAA( DBFILEHDR, "|.|.disk" ), { "CDynamicHashTable", &(CDUMPA::instance), "CDynamicHashTable
" }, { "CApproximateIndex", &(CDUMPA::instance), "CApproximateIndex
" }, { "g_bflruk", &(CDUMPA::instance), "g_bflruk ese!g_bflruk" }, - DUMPA( COSDisk ), - DUMPA( COSFile ), + DUMPAA( COSDisk, "|.db|.edb" ), + DUMPAA( COSFile, "|.db|.edb" ), DUMPA( COSFileFind ), DUMPA( COSFileSystem ), - DUMPAA( IOREQ, "[dumpall|norunstats]" ), + DUMPAA( IOREQ, " [dumpall|norunstats]" ), { "PAGE", &(CDUMPA::instance), "PAGE [a|b|h|t|*|2|4|8|16|32] - a=alloc map, b=binary dump, h=header, t=tags, *=all, 2/4/8/16/32=pagesize" }, DUMPA( CResource ), @@ -16117,7 +16117,7 @@ DEBUG_EXT( EDBGDumpDBDiskPage ) dprintf( "Error: Could not read global FMP variables for ifmp = %d.\n", ifmp ); goto HandleError; } - else if ( pgno < 1 ) // UNDONE: don't currently support dumping page header + else if ( pgno < 1 ) { dprintf( "Error: Invalid pgno.\n" ); goto HandleError; @@ -16180,6 +16180,10 @@ DEBUG_EXT( EDBGDumpDBDiskPage ) { VirtualFree( pbPage, 0, MEM_RELEASE ); } + if ( NULL != posf ) + { + Unfetch( posf ); + } } @@ -19127,6 +19131,8 @@ VOID CDUMPA::Dump( { DBFILEHDR * pdbfilehdrDebuggee = NULL; DBFILEHDR * pdbfilehdr = NULL; + COSFile * posf = NULL; + const BOOL fReadFromDisk = ( argc >= 1 || 0 == _stricmp( argv[ 0 ], ".disk" ) ); const CHAR * const szMemDump = "mem"; @@ -19144,24 +19150,81 @@ VOID CDUMPA::Dump( return; } - if ( FFetchVariable( pdbfilehdrDebuggee, &pdbfilehdr ) ) + if ( fReadFromDisk ) { - const SIZE_T dwOffset = (BYTE *)pdbfilehdrDebuggee - (BYTE *)pdbfilehdr; + HANDLE hCurrentProcess; + ULONG64 ulCurrentProcess; + + const ULONG cbPage = Pdls()->CbPage(); - dprintf( "[DBFILEHDR] 0x%p bytes @ 0x%N\n", - QWORD( sizeof( DBFILEHDR ) ), - pdbfilehdrDebuggee ); - if ( fMemDump ) + if ( Pdls()->IfmpCurrent() == ifmpNil || Pdls()->IfmpCurrent() == 0 || + Pdls()->PfmpCache( Pdls()->IfmpCurrent() ) == NULL || + cbPage == 0 ) { - (VOID)( pdbfilehdr->Dump( CPRINTFWDBG::PcprintfInstance(), dwOffset ) ); + dprintf( "Something went wrong. To use .disk argument, must have an implicit IFMP set with !ese .db. Or we couldn't load the Pfmp cache or cbPage. (%d, 0x%p, %d)\n", + Pdls()->IfmpCurrent(), ( Pdls()->IfmpCurrent() != 0 && Pdls()->IfmpCurrent() != ifmpNil ) ? Pdls()->PfmpCache( Pdls()->IfmpCurrent() ) : NULL, cbPage ); + goto HandleError; } - else + + // UNDONE: currently assumes all databases are COSFile + // + if ( !FFetchVariable( (COSFile *)( Pdls()->PfmpCache( Pdls()->IfmpCurrent() ) )->Pfapi(), &posf ) ) { - (VOID)( pdbfilehdr->DumpLite( CPRINTFWDBG::PcprintfInstance(), "\n", dwOffset ) ); + dprintf( "Error: Could not read COSFile at 0x%N for specified FMP.\n", ( Pdls()->PfmpCache( Pdls()->IfmpCurrent() ) )->Pfapi() ); + goto HandleError; } - - Unfetch( pdbfilehdr ); + + // VirtualAlloc() the buffer to ensure alignment + // + pdbfilehdr = (DBFILEHDR *)VirtualAlloc( NULL, cbPage, MEM_COMMIT, PAGE_READWRITE ); + if ( NULL == pdbfilehdr ) + { + dprintf( "Error: Could not allocate DBFILEHDR buffer (%d bytes) via VA !\n", cbPage ); + goto HandleError; + return; + } + + HRESULT hr = g_DebugSystemObjects->GetCurrentProcessHandle( &ulCurrentProcess ); + hCurrentProcess = (HANDLE) ulCurrentProcess; + if ( FAILED( hr ) ) + { + dprintf( "Failed to fetch process handle: %#x\n", hr ); + goto HandleError; + } + + if ( !FEDBGGetDbDiskPage( hCurrentProcess, posf->Handle(), (PGNO)-1 /* 0 would be shadow header */, (BYTE*)pdbfilehdr, cbPage ) ) + { + dprintf( "Failed to read from disk handle.\n" ); + goto HandleError; + } + dprintf( "Successfully read DBFILEHDR off the disk.\n" ); + if ( pdbfilehdr->le_filetype != JET_filetypeDatabase ) + { + dprintf( "\nWARNING: The read DBFILEHDR doesn't have JET_filetypeDatabase. Corruption or maybe EBC is enabled. Dumping contents anyways.\n\n" ); + } + } + else if ( !FFetchVariable( pdbfilehdrDebuggee, &pdbfilehdr ) ) + { + dprintf( "Failed to fetch DBFILEHDR memory from debugger process.\n" ); + goto HandleError; + } + + const SIZE_T dwOffset = fReadFromDisk ? 0 : ( (BYTE *)pdbfilehdrDebuggee - (BYTE *)pdbfilehdr ); + + dprintf( "[DBFILEHDR] 0x%p bytes @ 0x%N\n", QWORD( sizeof( DBFILEHDR ) ), pdbfilehdrDebuggee ); + if ( fMemDump ) + { + (VOID)( pdbfilehdr->Dump( CPRINTFWDBG::PcprintfInstance(), dwOffset ) ); } + else + { + (VOID)( pdbfilehdr->DumpLite( CPRINTFWDBG::PcprintfInstance(), "\n", dwOffset ) ); + } + +HandleError: + + fReadFromDisk ? VirtualFree( pdbfilehdr, 0, MEM_RELEASE ) : Unfetch( pdbfilehdr ); + Unfetch( posf ); } // TrxidStack dumping From bb456a90ffb5a2ff5f6a4be07dc9738a4ed6e307 Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Tue, 11 Oct 2022 15:09:13 +0000 Subject: [PATCH 054/102] ESE Block Cache: Stability: FlushAllState on dismount crashes due to previous state The FlushAllState that we now due on a prepare to dismount is accidentally touching deallocated state (TLS). This code path has been modified to clearly separate the HandleJournalFull case and the PrepareToDismount case and the latter case now doesn't touch any of the sync state used by HandleJournalFull. This should avoid the hazard. [Substrate:e4758bc5c839ac06d94398fbfafb1c5f97c5fce5] --- .../src/os/blockcache/_hashedlrukcache.hxx | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx index e95e079f..84e4f715 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx @@ -3274,7 +3274,7 @@ class THashedLRUKCache // ask the cache to flush all its state up to the write back pointer Call( m_pjInner->ErrGetProperties( NULL, &jposReplayNew, NULL, NULL, NULL ) ); - Call( m_pc->ErrFlushAllState( jposReplayNew ) ); + Call( m_pc->ErrFlushAllState( jposReplayNew, fTrue ) ); // advance the replay pointer to the write back pointer @@ -4739,7 +4739,7 @@ class THashedLRUKCache void AsyncSlabWriteBackWorker(); void PerformOpportunisticSlabWriteBacks(); ERR ErrVerifyTruncate( _In_ const JournalPosition jposReplay ); - ERR ErrFlushAllState( _In_ const JournalPosition jposDurableForWriteBack ); + ERR ErrFlushAllState( _In_ const JournalPosition jposDurableForWriteBack, _In_ const BOOL fSaveOpenSlabs ); ERR ErrTryStartSlabWriteBacks( _In_ CArray& arrayIbSlab, _In_ const BOOL fSaveOpenSlabs ); ICachedBlockSlab* PcbsGetOpenSlabSafeForWriteBack( _In_ const QWORD ibSlab ); @@ -5348,7 +5348,7 @@ ERR THashedLRUKCache::ErrPrepareToDismount() } if ( jposReplay < jposDurableForWriteBack ) { - Call( ErrFlushAllState( jposDurableForWriteBack ) ); + Call( ErrFlushAllState( jposDurableForWriteBack, fFalse ) ); Call( m_pj->ErrTruncate( jposDurableForWriteBack ) ); Call( ErrFlush() ); } @@ -7620,20 +7620,21 @@ HandleError: } template -ERR THashedLRUKCache::ErrFlushAllState( _In_ const JournalPosition jposDurableForWriteBack ) +ERR THashedLRUKCache::ErrFlushAllState( _In_ const JournalPosition jposDurableForWriteBack, + _In_ const BOOL fSaveOpenSlabs ) { ERR err = JET_errSuccess; CHashedLRUKCacheThreadLocalStorage* pctls = NULL; BOOL fListLocked = fFalse; CArray arrayIbSlab; - // get our thread local storage - - Call( ErrGetThreadLocalStorage( &pctls ) ); - // register the thread that is performing the flush as suspended as well so that we can write its slabs - Call( ErrSuspendThreadFromStateAccess( m_msStateAccess.GroupActive(), pctls ) ); + if ( fSaveOpenSlabs ) + { + Call( ErrGetThreadLocalStorage( &pctls ) ); + Call( ErrSuspendThreadFromStateAccess( m_msStateAccess.GroupActive(), pctls ) ); + } // retry until all dirty slabs are written out @@ -7641,7 +7642,10 @@ ERR THashedLRUKCache::ErrFlushAllState( _In_ const JournalPosition jposDurabl { // register any effectively suspended threads - Call( ErrSuspendBlockedThreadsFromStateAccess() ); + if ( fSaveOpenSlabs ) + { + Call( ErrSuspendBlockedThreadsFromStateAccess() ); + } // get a list of all dirty slabs @@ -7674,7 +7678,7 @@ ERR THashedLRUKCache::ErrFlushAllState( _In_ const JournalPosition jposDurabl if ( arrayIbSlab.Size() > 0 ) { - Call( ErrTryStartSlabWriteBacks( arrayIbSlab, fTrue ) ); + Call( ErrTryStartSlabWriteBacks( arrayIbSlab, fSaveOpenSlabs ) ); } } From d55136d273c0766d8dafb554420b54b0fe85d433 Mon Sep 17 00:00:00 2001 From: Brett Shirley Date: Wed, 12 Oct 2022 06:45:52 +0000 Subject: [PATCH 055/102] Disable Assert FaultItem type 2 with no inst [Substrate:cbedfd64545a287c198e7fe24de9ba8e491b8498] --- dev/ese/src/ese/_osu/hapublishu.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/ese/src/ese/_osu/hapublishu.cxx b/dev/ese/src/ese/_osu/hapublishu.cxx index 57eea8ef..05c4cf1c 100644 --- a/dev/ese/src/ese/_osu/hapublishu.cxx +++ b/dev/ese/src/ese/_osu/hapublishu.cxx @@ -26,7 +26,7 @@ void OSUHAPublishEvent_( // update: why? if ( pinst == NULL || pinstNil == pinst ) { - FireWall( "SkipFi2NoInst" ); + //FireWall( "SkipFi2NoInst" ); fEmit = fFalse; } From a105eedc3c6bdffefc33c648e5ec5bfeb7b90aaf Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Wed, 12 Oct 2022 16:01:11 +0000 Subject: [PATCH 056/102] ESE Block Cache: Perf: attach shouldn't consult cached file configuration if file cannot be attached If the file is not marked as "ever eligible for caching" then we don't need to check the cached file configuration. This saves repeated unnecessary checks of the config on every write which can be expensive. [Substrate:296aa28122b8bc82ea1404c7879b9d7763d3c617] --- dev/ese/src/os/blockcache/_filefilter.hxx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dev/ese/src/os/blockcache/_filefilter.hxx b/dev/ese/src/os/blockcache/_filefilter.hxx index 3bd15eed..f2b7cbfe 100644 --- a/dev/ese/src/os/blockcache/_filefilter.hxx +++ b/dev/ese/src/os/blockcache/_filefilter.hxx @@ -2939,7 +2939,9 @@ ERR TFileFilter::ErrBeginAccess( _In_ const COffsets& offs { ERR err = JET_errSuccess; const BOOL fNotYetAttached = !m_pcfh && !m_initOnceAttach.FIsInit(); - const BOOL fNeedsAttach = fWrite && fNotYetAttached && m_pcfconfig && m_pcfconfig->FCachingEnabled(); + const BOOL fNeedsAttach = ( fWrite && fNotYetAttached && + m_fEverEligibleForCaching && + m_pcfconfig && m_pcfconfig->FCachingEnabled() ); COffsets offsetsActual = offsets; CMeteredSection::Group group = CMeteredSection::groupInvalidNil; CSemaphore* psem = NULL; From cbaa96ab6169c1b746d87b4b19a132b2d7e8ca28 Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Wed, 12 Oct 2022 16:01:45 +0000 Subject: [PATCH 057/102] ESE Block Cache: Perf: compress journal entries with LZ4 In prod, we use a significant amount of CPU compressing journal entries. We are currently using the implementation of Xpress Huffman built into the NT RTL. This change adds a mode to compress journal entries with the existing LZ4 library included in ESE. This library is faster and has better compression. With Xpress Huffman, our compressed journal entries had a bi-modal size of about 2200b and 3500b, as some include one slab of one chunk and others are two slabs of one chunk. This is a compression ratio of about 0.54 and 0.42. With LZ4, our compressed journal entries all end up to be about 2300b regardless of the slab count. This is a compression ratio of 0.56 and 0.28. These cases are about equal so the overall compression is better. The average size for LZ4 is about 81% of the size of Xpress Huffman. We will actually enable the use of LZ4 in a later checkin after this one saturates. We are doing this to avoid killing database copies if this build rolls back because the downlevel code won't know how to decode journal entries compressed with LZ4. [Substrate:b403edaec94ed07cdcf207a4411b54fb39fa7c87] --- dev/ese/src/CMakeLists.txt | 1 + dev/ese/src/_lz4/CMakeLists.txt | 11 + dev/ese/src/_lz4/lz4.c | 2495 +++++++++++++++++++ dev/ese/src/_lz4/lz4.h | 774 ++++++ dev/ese/src/ese/CMakeLists.txt | 1 + dev/ese/src/os/blockcache/_blockcache.hxx | 1 + dev/ese/src/os/blockcache/_journalentry.hxx | 123 +- dev/ese/src/os/litent/CMakeLists.txt | 4 + dev/ese/src/os/winnt/CMakeLists.txt | 4 + 9 files changed, 3373 insertions(+), 41 deletions(-) create mode 100644 dev/ese/src/_lz4/CMakeLists.txt create mode 100644 dev/ese/src/_lz4/lz4.c create mode 100644 dev/ese/src/_lz4/lz4.h diff --git a/dev/ese/src/CMakeLists.txt b/dev/ese/src/CMakeLists.txt index 34d91c3c..f48a637a 100644 --- a/dev/ese/src/CMakeLists.txt +++ b/dev/ese/src/CMakeLists.txt @@ -7,6 +7,7 @@ add_subdirectory(_errstr) add_subdirectory(_esefile) add_subdirectory(_etw) +add_subdirectory(_lz4) add_subdirectory(_perfctrs) add_subdirectory(_res) add_subdirectory(_xpress) diff --git a/dev/ese/src/_lz4/CMakeLists.txt b/dev/ese/src/_lz4/CMakeLists.txt new file mode 100644 index 00000000..a4375cd1 --- /dev/null +++ b/dev/ese/src/_lz4/CMakeLists.txt @@ -0,0 +1,11 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +add_compile_definitions(_UNICODE) +add_compile_definitions(UNICODE) + +add_library(_lz4 STATIC + lz4.c) + +# Makes the headers in this directory available to consumers +target_include_directories(_lz4 PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/dev/ese/src/_lz4/lz4.c b/dev/ese/src/_lz4/lz4.c new file mode 100644 index 00000000..c2f504ef --- /dev/null +++ b/dev/ese/src/_lz4/lz4.c @@ -0,0 +1,2495 @@ +/* + LZ4 - Fast LZ compression algorithm + Copyright (C) 2011-2020, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 homepage : http://www.lz4.org + - LZ4 source repository : https://github.com/lz4/lz4 +*/ + +/*-************************************ +* Tuning parameters +**************************************/ +/* + * LZ4_HEAPMODE : + * Select how default compression functions will allocate memory for their hash table, + * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()). + */ +#ifndef LZ4_HEAPMODE +# define LZ4_HEAPMODE 0 +#endif + +/* + * LZ4_ACCELERATION_DEFAULT : + * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0 + */ +#define LZ4_ACCELERATION_DEFAULT 1 +/* + * LZ4_ACCELERATION_MAX : + * Any "acceleration" value higher than this threshold + * get treated as LZ4_ACCELERATION_MAX instead (fix #876) + */ +#define LZ4_ACCELERATION_MAX 65537 + + +/*-************************************ +* CPU Feature Detection +**************************************/ +/* LZ4_FORCE_MEMORY_ACCESS + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets which assembly generation depends on alignment. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */ +# if defined(__GNUC__) && \ + ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \ + || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define LZ4_FORCE_MEMORY_ACCESS 2 +# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__) +# define LZ4_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +/* + * LZ4_FORCE_SW_BITCOUNT + * Define this parameter if your target system or compiler does not support hardware bit count + */ +#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for WinCE doesn't support Hardware bit count */ +# undef LZ4_FORCE_SW_BITCOUNT /* avoid double def */ +# define LZ4_FORCE_SW_BITCOUNT +#endif + + + +/*-************************************ +* Dependency +**************************************/ +/* + * LZ4_SRC_INCLUDED: + * Amalgamation flag, whether lz4.c is included + */ +#ifndef LZ4_SRC_INCLUDED +# define LZ4_SRC_INCLUDED 1 +#endif + +#ifndef LZ4_STATIC_LINKING_ONLY +#define LZ4_STATIC_LINKING_ONLY +#endif + +#ifndef LZ4_DISABLE_DEPRECATE_WARNINGS +#define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */ +#endif + +#define LZ4_STATIC_LINKING_ONLY /* LZ4_DISTANCE_MAX */ +#include "lz4.h" +/* see also "memory routines" below */ + + +/*-************************************ +* Compiler Options +**************************************/ +#if defined(_MSC_VER) && (_MSC_VER >= 1400) /* Visual Studio 2005+ */ +# include /* only present in VS2005+ */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif /* _MSC_VER */ + +#ifndef LZ4_FORCE_INLINE +# ifdef _MSC_VER /* Visual Studio */ +# define LZ4_FORCE_INLINE static __forceinline +# else +# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# ifdef __GNUC__ +# define LZ4_FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define LZ4_FORCE_INLINE static inline +# endif +# else +# define LZ4_FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +# endif /* _MSC_VER */ +#endif /* LZ4_FORCE_INLINE */ + +/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE + * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8, + * together with a simple 8-byte copy loop as a fall-back path. + * However, this optimization hurts the decompression speed by >30%, + * because the execution does not go to the optimized loop + * for typical compressible data, and all of the preamble checks + * before going to the fall-back path become useless overhead. + * This optimization happens only with the -O3 flag, and -O2 generates + * a simple 8-byte copy loop. + * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8 + * functions are annotated with __attribute__((optimize("O2"))), + * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute + * of LZ4_wildCopy8 does not affect the compression speed. + */ +#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__) +# define LZ4_FORCE_O2 __attribute__((optimize("O2"))) +# undef LZ4_FORCE_INLINE +# define LZ4_FORCE_INLINE static __inline __attribute__((optimize("O2"),always_inline)) +#else +# define LZ4_FORCE_O2 +#endif + +#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__) +# define expect(expr,value) (__builtin_expect ((expr),(value)) ) +#else +# define expect(expr,value) (expr) +#endif + +#ifndef likely +#define likely(expr) expect((expr) != 0, 1) +#endif +#ifndef unlikely +#define unlikely(expr) expect((expr) != 0, 0) +#endif + +/* Should the alignment test prove unreliable, for some reason, + * it can be disabled by setting LZ4_ALIGN_TEST to 0 */ +#ifndef LZ4_ALIGN_TEST /* can be externally provided */ +# define LZ4_ALIGN_TEST 1 +#endif + + +/*-************************************ +* Memory routines +**************************************/ +#ifdef LZ4_USER_MEMORY_FUNCTIONS +/* memory management functions can be customized by user project. + * Below functions must exist somewhere in the Project + * and be available at link time */ +void* LZ4_malloc(size_t s); +void* LZ4_calloc(size_t n, size_t s); +void LZ4_free(void* p); +# define ALLOC(s) LZ4_malloc(s) +# define ALLOC_AND_ZERO(s) LZ4_calloc(1,s) +# define FREEMEM(p) LZ4_free(p) +#else +# include /* malloc, calloc, free */ +# define ALLOC(s) malloc(s) +# define ALLOC_AND_ZERO(s) calloc(1,s) +# define FREEMEM(p) free(p) +#endif + +#include /* memset, memcpy */ +#define MEM_INIT(p,v,s) memset((p),(v),(s)) + + +/*-************************************ +* Common Constants +**************************************/ +#define MINMATCH 4 + +#define WILDCOPYLENGTH 8 +#define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ +#define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ +#define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */ +#define FASTLOOP_SAFE_DISTANCE 64 +static const int LZ4_minLength = (MFLIMIT+1); + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define LZ4_DISTANCE_ABSOLUTE_MAX 65535 +#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX) /* max supported by LZ4 format */ +# error "LZ4_DISTANCE_MAX is too big : must be <= 65535" +#endif + +#define ML_BITS 4 +#define ML_MASK ((1U<=1) +# include +#else +# ifndef assert +# define assert(condition) ((void)0) +# endif +#endif + +#define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(int)(!!(c)) }; } /* use after variable declarations */ + +#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2) +# include + static int g_debuglog_enable = 1; +# define DEBUGLOG(l, ...) { \ + if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \ + fprintf(stderr, __FILE__ ": "); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, " \n"); \ + } } +#else +# define DEBUGLOG(l, ...) {} /* disabled */ +#endif + +static int LZ4_isAligned(const void* ptr, size_t alignment) +{ + return ((size_t)ptr & (alignment -1)) == 0; +} + + +/*-************************************ +* Types +**************************************/ +#include +#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef uintptr_t uptrval; +#else +# if UINT_MAX != 4294967295UL +# error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4" +# endif + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; + typedef size_t uptrval; /* generally true, except OpenVMS-64 */ +#endif + +#if defined(__x86_64__) + typedef U64 reg_t; /* 64-bits in x32 mode */ +#else + typedef size_t reg_t; /* 32-bits in x32 mode */ +#endif + +typedef enum { + notLimited = 0, + limitedOutput = 1, + fillOutput = 2 +} limitedOutput_directive; + + +/*-************************************ +* Reading and writing into memory +**************************************/ + +/** + * LZ4 relies on memcpy with a constant size being inlined. In freestanding + * environments, the compiler can't assume the implementation of memcpy() is + * standard compliant, so it can't apply its specialized memcpy() inlining + * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze + * memcpy() as if it were standard compliant, so it can inline it in freestanding + * environments. This is needed when decompressing the Linux Kernel, for example. + */ +#if defined(__GNUC__) && (__GNUC__ >= 4) +#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) +#else +#define LZ4_memcpy(dst, src, size) memcpy(dst, src, size) +#endif + +static unsigned LZ4_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + + +#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2) +/* lie to the compiler about data alignment; use with caution */ + +static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; } +static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; } +static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; } + +static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } +static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } + +#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign; + +static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } +static U32 LZ4_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +static reg_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArch; } + +static void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } +static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } + +#else /* safe and portable access using memcpy() */ + +static U16 LZ4_read16(const void* memPtr) +{ + U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; +} + +static U32 LZ4_read32(const void* memPtr) +{ + U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; +} + +static reg_t LZ4_read_ARCH(const void* memPtr) +{ + reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; +} + +static void LZ4_write16(void* memPtr, U16 value) +{ + LZ4_memcpy(memPtr, &value, sizeof(value)); +} + +static void LZ4_write32(void* memPtr, U32 value) +{ + LZ4_memcpy(memPtr, &value, sizeof(value)); +} + +#endif /* LZ4_FORCE_MEMORY_ACCESS */ + + +static U16 LZ4_readLE16(const void* memPtr) +{ + if (LZ4_isLittleEndian()) { + return LZ4_read16(memPtr); + } else { + const BYTE* p = (const BYTE*)memPtr; + return (U16)((U16)p[0] + (p[1]<<8)); + } +} + +static void LZ4_writeLE16(void* memPtr, U16 value) +{ + if (LZ4_isLittleEndian()) { + LZ4_write16(memPtr, value); + } else { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE) value; + p[1] = (BYTE)(value>>8); + } +} + +/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */ +LZ4_FORCE_INLINE +void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd) +{ + BYTE* d = (BYTE*)dstPtr; + const BYTE* s = (const BYTE*)srcPtr; + BYTE* const e = (BYTE*)dstEnd; + + do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d= 16. */ +LZ4_FORCE_INLINE void +LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd) +{ + BYTE* d = (BYTE*)dstPtr; + const BYTE* s = (const BYTE*)srcPtr; + BYTE* const e = (BYTE*)dstEnd; + + do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d= dstPtr + MINMATCH + * - there is at least 8 bytes available to write after dstEnd */ +LZ4_FORCE_INLINE void +LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) +{ + BYTE v[8]; + + assert(dstEnd >= dstPtr + MINMATCH); + + switch(offset) { + case 1: + MEM_INIT(v, *srcPtr, 8); + break; + case 2: + LZ4_memcpy(v, srcPtr, 2); + LZ4_memcpy(&v[2], srcPtr, 2); + LZ4_memcpy(&v[4], v, 4); + break; + case 4: + LZ4_memcpy(v, srcPtr, 4); + LZ4_memcpy(&v[4], srcPtr, 4); + break; + default: + LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset); + return; + } + + LZ4_memcpy(dstPtr, v, 8); + dstPtr += 8; + while (dstPtr < dstEnd) { + LZ4_memcpy(dstPtr, v, 8); + dstPtr += 8; + } +} +#endif + + +/*-************************************ +* Common functions +**************************************/ +static unsigned LZ4_NbCommonBytes (reg_t val) +{ + assert(val != 0); + if (LZ4_isLittleEndian()) { + if (sizeof(val) == 8) { +# if defined(_MSC_VER) && (_MSC_VER >= 1800) && defined(_M_AMD64) && !defined(LZ4_FORCE_SW_BITCOUNT) + /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */ + return (unsigned)_tzcnt_u64(val) >> 3; +# elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r = 0; + _BitScanForward64(&r, (U64)val); + return (unsigned)r >> 3; +# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_ctzll((U64)val) >> 3; +# else + const U64 m = 0x0101010101010101ULL; + val ^= val - 1; + return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56); +# endif + } else /* 32 bits */ { +# if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r; + _BitScanForward(&r, (U32)val); + return (unsigned)r >> 3; +# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_ctz((U32)val) >> 3; +# else + const U32 m = 0x01010101; + return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24; +# endif + } + } else /* Big Endian CPU */ { + if (sizeof(val)==8) { +# if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_clzll((U64)val) >> 3; +# else +#if 1 + /* this method is probably faster, + * but adds a 128 bytes lookup table */ + static const unsigned char ctz7_tab[128] = { + 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + }; + U64 const mask = 0x0101010101010101ULL; + U64 const t = (((val >> 8) - mask) | val) & mask; + return ctz7_tab[(t * 0x0080402010080402ULL) >> 57]; +#else + /* this method doesn't consume memory space like the previous one, + * but it contains several branches, + * that may end up slowing execution */ + static const U32 by32 = sizeof(val)*4; /* 32 on 64 bits (goal), 16 on 32 bits. + Just to avoid some static analyzer complaining about shift by 32 on 32-bits target. + Note that this code path is never triggered in 32-bits mode. */ + unsigned r; + if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +#endif +# endif + } else /* 32 bits */ { +# if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_clz((U32)val) >> 3; +# else + val >>= 8; + val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) | + (val + 0x00FF0000)) >> 24; + return (unsigned)val ^ 3; +# endif + } + } +} + + +#define STEPSIZE sizeof(reg_t) +LZ4_FORCE_INLINE +unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) +{ + const BYTE* const pStart = pIn; + + if (likely(pIn < pInLimit-(STEPSIZE-1))) { + reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); + if (!diff) { + pIn+=STEPSIZE; pMatch+=STEPSIZE; + } else { + return LZ4_NbCommonBytes(diff); + } } + + while (likely(pIn < pInLimit-(STEPSIZE-1))) { + reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); + if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; } + pIn += LZ4_NbCommonBytes(diff); + return (unsigned)(pIn - pStart); + } + + if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; } + if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; } + if ((pIn compression run slower on incompressible data */ + + +/*-************************************ +* Local Structures and types +**************************************/ +typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t; + +/** + * This enum distinguishes several different modes of accessing previous + * content in the stream. + * + * - noDict : There is no preceding content. + * - withPrefix64k : Table entries up to ctx->dictSize before the current blob + * blob being compressed are valid and refer to the preceding + * content (of length ctx->dictSize), which is available + * contiguously preceding in memory the content currently + * being compressed. + * - usingExtDict : Like withPrefix64k, but the preceding content is somewhere + * else in memory, starting at ctx->dictionary with length + * ctx->dictSize. + * - usingDictCtx : Like usingExtDict, but everything concerning the preceding + * content is in a separate context, pointed to by + * ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table + * entries in the current context that refer to positions + * preceding the beginning of the current compression are + * ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx + * ->dictSize describe the location and size of the preceding + * content, and matches are found by looking in the ctx + * ->dictCtx->hashTable. + */ +typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive; +typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; + + +/*-************************************ +* Local Utils +**************************************/ +int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; } +const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; } +int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); } +int LZ4_sizeofState(void) { return LZ4_STREAMSIZE; } + + +/*-************************************ +* Internal Definitions used in Tests +**************************************/ +#if defined (__cplusplus) +extern "C" { +#endif + +int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize); + +int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, + int compressedSize, int maxOutputSize, + const void* dictStart, size_t dictSize); + +#if defined (__cplusplus) +} +#endif + +/*-****************************** +* Compression functions +********************************/ +LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType) +{ + if (tableType == byU16) + return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1))); + else + return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG)); +} + +LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType) +{ + const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG; + if (LZ4_isLittleEndian()) { + const U64 prime5bytes = 889523592379ULL; + return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog)); + } else { + const U64 prime8bytes = 11400714785074694791ULL; + return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog)); + } +} + +LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType) +{ + if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType); + return LZ4_hash4(LZ4_read32(p), tableType); +} + +LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType) +{ + switch (tableType) + { + default: /* fallthrough */ + case clearedTable: { /* illegal! */ assert(0); return; } + case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; } + case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; } + case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; } + } +} + +LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType) +{ + switch (tableType) + { + default: /* fallthrough */ + case clearedTable: /* fallthrough */ + case byPtr: { /* illegal! */ assert(0); return; } + case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; } + case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; } + } +} + +LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h, + void* tableBase, tableType_t const tableType, + const BYTE* srcBase) +{ + switch (tableType) + { + case clearedTable: { /* illegal! */ assert(0); return; } + case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; } + case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; } + case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; } + } +} + +LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase) +{ + U32 const h = LZ4_hashPosition(p, tableType); + LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase); +} + +/* LZ4_getIndexOnHash() : + * Index of match position registered in hash table. + * hash position must be calculated by using base+index, or dictBase+index. + * Assumption 1 : only valid if tableType == byU32 or byU16. + * Assumption 2 : h is presumed valid (within limits of hash table) + */ +LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType) +{ + LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2); + if (tableType == byU32) { + const U32* const hashTable = (const U32*) tableBase; + assert(h < (1U << (LZ4_MEMORY_USAGE-2))); + return hashTable[h]; + } + if (tableType == byU16) { + const U16* const hashTable = (const U16*) tableBase; + assert(h < (1U << (LZ4_MEMORY_USAGE-1))); + return hashTable[h]; + } + assert(0); return 0; /* forbidden case */ +} + +static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType, const BYTE* srcBase) +{ + if (tableType == byPtr) { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; } + if (tableType == byU32) { const U32* const hashTable = (const U32*) tableBase; return hashTable[h] + srcBase; } + { const U16* const hashTable = (const U16*) tableBase; return hashTable[h] + srcBase; } /* default, to ensure a return */ +} + +LZ4_FORCE_INLINE const BYTE* +LZ4_getPosition(const BYTE* p, + const void* tableBase, tableType_t tableType, + const BYTE* srcBase) +{ + U32 const h = LZ4_hashPosition(p, tableType); + return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase); +} + +LZ4_FORCE_INLINE void +LZ4_prepareTable(LZ4_stream_t_internal* const cctx, + const int inputSize, + const tableType_t tableType) { + /* If the table hasn't been used, it's guaranteed to be zeroed out, and is + * therefore safe to use no matter what mode we're in. Otherwise, we figure + * out if it's safe to leave as is or whether it needs to be reset. + */ + if ((tableType_t)cctx->tableType != clearedTable) { + assert(inputSize >= 0); + if ((tableType_t)cctx->tableType != tableType + || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU) + || ((tableType == byU32) && cctx->currentOffset > 1 GB) + || tableType == byPtr + || inputSize >= 4 KB) + { + DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx); + MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE); + cctx->currentOffset = 0; + cctx->tableType = (U32)clearedTable; + } else { + DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)"); + } + } + + /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back, is faster + * than compressing without a gap. However, compressing with + * currentOffset == 0 is faster still, so we preserve that case. + */ + if (cctx->currentOffset != 0 && tableType == byU32) { + DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset"); + cctx->currentOffset += 64 KB; + } + + /* Finally, clear history */ + cctx->dictCtx = NULL; + cctx->dictionary = NULL; + cctx->dictSize = 0; +} + +/** LZ4_compress_generic() : + * inlined, to ensure branches are decided at compilation time. + * Presumed already validated at this stage: + * - source != NULL + * - inputSize > 0 + */ +LZ4_FORCE_INLINE int LZ4_compress_generic_validated( + LZ4_stream_t_internal* const cctx, + const char* const source, + char* const dest, + const int inputSize, + int *inputConsumed, /* only written when outputDirective == fillOutput */ + const int maxOutputSize, + const limitedOutput_directive outputDirective, + const tableType_t tableType, + const dict_directive dictDirective, + const dictIssue_directive dictIssue, + const int acceleration) +{ + int result; + const BYTE* ip = (const BYTE*) source; + + U32 const startIndex = cctx->currentOffset; + const BYTE* base = (const BYTE*) source - startIndex; + const BYTE* lowLimit; + + const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx; + const BYTE* const dictionary = + dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary; + const U32 dictSize = + dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize; + const U32 dictDelta = (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0; /* make indexes in dictCtx comparable with index in current context */ + + int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx); + U32 const prefixIdxLimit = startIndex - dictSize; /* used when dictDirective == dictSmall */ + const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary; + const BYTE* anchor = (const BYTE*) source; + const BYTE* const iend = ip + inputSize; + const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1; + const BYTE* const matchlimit = iend - LASTLITERALS; + + /* the dictCtx currentOffset is indexed on the start of the dictionary, + * while a dictionary in the current context precedes the currentOffset */ + const BYTE* dictBase = !dictionary ? NULL : (dictDirective == usingDictCtx) ? + dictionary + dictSize - dictCtx->currentOffset : + dictionary + dictSize - startIndex; + + BYTE* op = (BYTE*) dest; + BYTE* const olimit = op + maxOutputSize; + + U32 offset = 0; + U32 forwardH; + + DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType); + assert(ip != NULL); + /* If init conditions are not met, we don't have to mark stream + * as having dirty context, since no action was taken yet */ + if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */ + if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) { return 0; } /* Size too large (not within 64K limit) */ + if (tableType==byPtr) assert(dictDirective==noDict); /* only supported use case with byPtr */ + assert(acceleration >= 1); + + lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0); + + /* Update context state */ + if (dictDirective == usingDictCtx) { + /* Subsequent linked blocks can't use the dictionary. */ + /* Instead, they use the block we just compressed. */ + cctx->dictCtx = NULL; + cctx->dictSize = (U32)inputSize; + } else { + cctx->dictSize += (U32)inputSize; + } + cctx->currentOffset += (U32)inputSize; + cctx->tableType = (U32)tableType; + + if (inputSizehashTable, tableType, base); + ip++; forwardH = LZ4_hashPosition(ip, tableType); + + /* Main Loop */ + for ( ; ; ) { + const BYTE* match; + BYTE* token; + const BYTE* filledIp; + + /* Find a match */ + if (tableType == byPtr) { + const BYTE* forwardIp = ip; + int step = 1; + int searchMatchNb = acceleration << LZ4_skipTrigger; + do { + U32 const h = forwardH; + ip = forwardIp; + forwardIp += step; + step = (searchMatchNb++ >> LZ4_skipTrigger); + + if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals; + assert(ip < mflimitPlusOne); + + match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base); + forwardH = LZ4_hashPosition(forwardIp, tableType); + LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base); + + } while ( (match+LZ4_DISTANCE_MAX < ip) + || (LZ4_read32(match) != LZ4_read32(ip)) ); + + } else { /* byU32, byU16 */ + + const BYTE* forwardIp = ip; + int step = 1; + int searchMatchNb = acceleration << LZ4_skipTrigger; + do { + U32 const h = forwardH; + U32 const current = (U32)(forwardIp - base); + U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType); + assert(matchIndex <= current); + assert(forwardIp - base < (ptrdiff_t)(2 GB - 1)); + ip = forwardIp; + forwardIp += step; + step = (searchMatchNb++ >> LZ4_skipTrigger); + + if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals; + assert(ip < mflimitPlusOne); + + if (dictDirective == usingDictCtx) { + if (matchIndex < startIndex) { + /* there was no match, try the dictionary */ + assert(tableType == byU32); + matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32); + match = dictBase + matchIndex; + matchIndex += dictDelta; /* make dictCtx index comparable with current context */ + lowLimit = dictionary; + } else { + match = base + matchIndex; + lowLimit = (const BYTE*)source; + } + } else if (dictDirective==usingExtDict) { + if (matchIndex < startIndex) { + DEBUGLOG(7, "extDict candidate: matchIndex=%5u < startIndex=%5u", matchIndex, startIndex); + assert(startIndex - matchIndex >= MINMATCH); + match = dictBase + matchIndex; + lowLimit = dictionary; + } else { + match = base + matchIndex; + lowLimit = (const BYTE*)source; + } + } else { /* single continuous memory segment */ + match = base + matchIndex; + } + forwardH = LZ4_hashPosition(forwardIp, tableType); + LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType); + + DEBUGLOG(7, "candidate at pos=%u (offset=%u \n", matchIndex, current - matchIndex); + if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; } /* match outside of valid area */ + assert(matchIndex < current); + if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX)) + && (matchIndex+LZ4_DISTANCE_MAX < current)) { + continue; + } /* too far */ + assert((current - matchIndex) <= LZ4_DISTANCE_MAX); /* match now expected within distance */ + + if (LZ4_read32(match) == LZ4_read32(ip)) { + if (maybe_extMem) offset = current - matchIndex; + break; /* match found */ + } + + } while(1); + } + + /* Catch up */ + filledIp = ip; + while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; } + + /* Encode Literals */ + { unsigned const litLength = (unsigned)(ip - anchor); + token = op++; + if ((outputDirective == limitedOutput) && /* Check output buffer overflow */ + (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) { + return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ + } + if ((outputDirective == fillOutput) && + (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) { + op--; + goto _last_literals; + } + if (litLength >= RUN_MASK) { + int len = (int)(litLength - RUN_MASK); + *token = (RUN_MASK<= 255 ; len-=255) *op++ = 255; + *op++ = (BYTE)len; + } + else *token = (BYTE)(litLength< olimit)) { + /* the match was too close to the end, rewind and go to last literals */ + op = token; + goto _last_literals; + } + + /* Encode Offset */ + if (maybe_extMem) { /* static test */ + DEBUGLOG(6, " with offset=%u (ext if > %i)", offset, (int)(ip - (const BYTE*)source)); + assert(offset <= LZ4_DISTANCE_MAX && offset > 0); + LZ4_writeLE16(op, (U16)offset); op+=2; + } else { + DEBUGLOG(6, " with offset=%u (same segment)", (U32)(ip - match)); + assert(ip-match <= LZ4_DISTANCE_MAX); + LZ4_writeLE16(op, (U16)(ip - match)); op+=2; + } + + /* Encode MatchLength */ + { unsigned matchCode; + + if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx) + && (lowLimit==dictionary) /* match within extDict */ ) { + const BYTE* limit = ip + (dictEnd-match); + assert(dictEnd > match); + if (limit > matchlimit) limit = matchlimit; + matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit); + ip += (size_t)matchCode + MINMATCH; + if (ip==limit) { + unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit); + matchCode += more; + ip += more; + } + DEBUGLOG(6, " with matchLength=%u starting in extDict", matchCode+MINMATCH); + } else { + matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit); + ip += (size_t)matchCode + MINMATCH; + DEBUGLOG(6, " with matchLength=%u", matchCode+MINMATCH); + } + + if ((outputDirective) && /* Check output buffer overflow */ + (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) { + if (outputDirective == fillOutput) { + /* Match description too long : reduce it */ + U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255; + ip -= matchCode - newMatchCode; + assert(newMatchCode < matchCode); + matchCode = newMatchCode; + if (unlikely(ip <= filledIp)) { + /* We have already filled up to filledIp so if ip ends up less than filledIp + * we have positions in the hash table beyond the current position. This is + * a problem if we reuse the hash table. So we have to remove these positions + * from the hash table. + */ + const BYTE* ptr; + DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip)); + for (ptr = ip; ptr <= filledIp; ++ptr) { + U32 const h = LZ4_hashPosition(ptr, tableType); + LZ4_clearHash(h, cctx->hashTable, tableType); + } + } + } else { + assert(outputDirective == limitedOutput); + return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ + } + } + if (matchCode >= ML_MASK) { + *token += ML_MASK; + matchCode -= ML_MASK; + LZ4_write32(op, 0xFFFFFFFF); + while (matchCode >= 4*255) { + op+=4; + LZ4_write32(op, 0xFFFFFFFF); + matchCode -= 4*255; + } + op += matchCode / 255; + *op++ = (BYTE)(matchCode % 255); + } else + *token += (BYTE)(matchCode); + } + /* Ensure we have enough space for the last literals. */ + assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit)); + + anchor = ip; + + /* Test end of chunk */ + if (ip >= mflimitPlusOne) break; + + /* Fill table */ + LZ4_putPosition(ip-2, cctx->hashTable, tableType, base); + + /* Test next position */ + if (tableType == byPtr) { + + match = LZ4_getPosition(ip, cctx->hashTable, tableType, base); + LZ4_putPosition(ip, cctx->hashTable, tableType, base); + if ( (match+LZ4_DISTANCE_MAX >= ip) + && (LZ4_read32(match) == LZ4_read32(ip)) ) + { token=op++; *token=0; goto _next_match; } + + } else { /* byU32, byU16 */ + + U32 const h = LZ4_hashPosition(ip, tableType); + U32 const current = (U32)(ip-base); + U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType); + assert(matchIndex < current); + if (dictDirective == usingDictCtx) { + if (matchIndex < startIndex) { + /* there was no match, try the dictionary */ + matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32); + match = dictBase + matchIndex; + lowLimit = dictionary; /* required for match length counter */ + matchIndex += dictDelta; + } else { + match = base + matchIndex; + lowLimit = (const BYTE*)source; /* required for match length counter */ + } + } else if (dictDirective==usingExtDict) { + if (matchIndex < startIndex) { + match = dictBase + matchIndex; + lowLimit = dictionary; /* required for match length counter */ + } else { + match = base + matchIndex; + lowLimit = (const BYTE*)source; /* required for match length counter */ + } + } else { /* single memory segment */ + match = base + matchIndex; + } + LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType); + assert(matchIndex < current); + if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1) + && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current)) + && (LZ4_read32(match) == LZ4_read32(ip)) ) { + token=op++; + *token=0; + if (maybe_extMem) offset = current - matchIndex; + DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i", + (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source)); + goto _next_match; + } + } + + /* Prepare next loop */ + forwardH = LZ4_hashPosition(++ip, tableType); + + } + +_last_literals: + /* Encode Last Literals */ + { size_t lastRun = (size_t)(iend - anchor); + if ( (outputDirective) && /* Check output buffer overflow */ + (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) { + if (outputDirective == fillOutput) { + /* adapt lastRun to fill 'dst' */ + assert(olimit >= op); + lastRun = (size_t)(olimit-op) - 1/*token*/; + lastRun -= (lastRun + 256 - RUN_MASK) / 256; /*additional length tokens*/ + } else { + assert(outputDirective == limitedOutput); + return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ + } + } + DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun); + if (lastRun >= RUN_MASK) { + size_t accumulator = lastRun - RUN_MASK; + *op++ = RUN_MASK << ML_BITS; + for(; accumulator >= 255 ; accumulator-=255) *op++ = 255; + *op++ = (BYTE) accumulator; + } else { + *op++ = (BYTE)(lastRun< 0); + DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result); + return result; +} + +/** LZ4_compress_generic() : + * inlined, to ensure branches are decided at compilation time; + * takes care of src == (NULL, 0) + * and forward the rest to LZ4_compress_generic_validated */ +LZ4_FORCE_INLINE int LZ4_compress_generic( + LZ4_stream_t_internal* const cctx, + const char* const src, + char* const dst, + const int srcSize, + int *inputConsumed, /* only written when outputDirective == fillOutput */ + const int dstCapacity, + const limitedOutput_directive outputDirective, + const tableType_t tableType, + const dict_directive dictDirective, + const dictIssue_directive dictIssue, + const int acceleration) +{ + DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i", + srcSize, dstCapacity); + + if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; } /* Unsupported srcSize, too large (or negative) */ + if (srcSize == 0) { /* src == NULL supported if srcSize == 0 */ + if (outputDirective != notLimited && dstCapacity <= 0) return 0; /* no output, can't write anything */ + DEBUGLOG(5, "Generating an empty block"); + assert(outputDirective == notLimited || dstCapacity >= 1); + assert(dst != NULL); + dst[0] = 0; + if (outputDirective == fillOutput) { + assert (inputConsumed != NULL); + *inputConsumed = 0; + } + return 1; + } + assert(src != NULL); + + return LZ4_compress_generic_validated(cctx, src, dst, srcSize, + inputConsumed, /* only written into if outputDirective == fillOutput */ + dstCapacity, outputDirective, + tableType, dictDirective, dictIssue, acceleration); +} + + +int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) +{ + LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse; + assert(ctx != NULL); + if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; + if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; + if (maxOutputSize >= LZ4_compressBound(inputSize)) { + if (inputSize < LZ4_64Klimit) { + return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration); + } else { + const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); + } + } else { + if (inputSize < LZ4_64Klimit) { + return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration); + } else { + const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration); + } + } +} + +/** + * LZ4_compress_fast_extState_fastReset() : + * A variant of LZ4_compress_fast_extState(). + * + * Using this variant avoids an expensive initialization step. It is only safe + * to call if the state buffer is known to be correctly initialized already + * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of + * "correctly initialized"). + */ +int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration) +{ + LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse; + if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; + if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; + + if (dstCapacity >= LZ4_compressBound(srcSize)) { + if (srcSize < LZ4_64Klimit) { + const tableType_t tableType = byU16; + LZ4_prepareTable(ctx, srcSize, tableType); + if (ctx->currentOffset) { + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration); + } else { + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); + } + } else { + const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + LZ4_prepareTable(ctx, srcSize, tableType); + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); + } + } else { + if (srcSize < LZ4_64Klimit) { + const tableType_t tableType = byU16; + LZ4_prepareTable(ctx, srcSize, tableType); + if (ctx->currentOffset) { + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration); + } else { + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration); + } + } else { + const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + LZ4_prepareTable(ctx, srcSize, tableType); + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration); + } + } +} + + +int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) +{ + int result; +#if (LZ4_HEAPMODE) + LZ4_stream_t* ctxPtr = ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ + if (ctxPtr == NULL) return 0; +#else + LZ4_stream_t ctx; + LZ4_stream_t* const ctxPtr = &ctx; +#endif + result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration); + +#if (LZ4_HEAPMODE) + FREEMEM(ctxPtr); +#endif + return result; +} + + +int LZ4_compress_default(const char* src, char* dst, int srcSize, int maxOutputSize) +{ + return LZ4_compress_fast(src, dst, srcSize, maxOutputSize, 1); +} + + +/* Note!: This function leaves the stream in an unclean/broken state! + * It is not safe to subsequently use the same state with a _fastReset() or + * _continue() call without resetting it. */ +static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize) +{ + void* const s = LZ4_initStream(state, sizeof (*state)); + assert(s != NULL); (void)s; + + if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) { /* compression success is guaranteed */ + return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1); + } else { + if (*srcSizePtr < LZ4_64Klimit) { + return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, 1); + } else { + tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, 1); + } } +} + + +int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize) +{ +#if (LZ4_HEAPMODE) + LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ + if (ctx == NULL) return 0; +#else + LZ4_stream_t ctxBody; + LZ4_stream_t* ctx = &ctxBody; +#endif + + int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize); + +#if (LZ4_HEAPMODE) + FREEMEM(ctx); +#endif + return result; +} + + + +/*-****************************** +* Streaming functions +********************************/ + +LZ4_stream_t* LZ4_createStream(void) +{ + LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); + LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal)); /* A compilation error here means LZ4_STREAMSIZE is not large enough */ + DEBUGLOG(4, "LZ4_createStream %p", lz4s); + if (lz4s == NULL) return NULL; + LZ4_initStream(lz4s, sizeof(*lz4s)); + return lz4s; +} + +static size_t LZ4_stream_t_alignment(void) +{ +#if LZ4_ALIGN_TEST + typedef struct { char c; LZ4_stream_t t; } t_a; + return sizeof(t_a) - sizeof(LZ4_stream_t); +#else + return 1; /* effectively disabled */ +#endif +} + +LZ4_stream_t* LZ4_initStream (void* buffer, size_t size) +{ + DEBUGLOG(5, "LZ4_initStream"); + if (buffer == NULL) { return NULL; } + if (size < sizeof(LZ4_stream_t)) { return NULL; } + if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL; + MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal)); + return (LZ4_stream_t*)buffer; +} + +/* resetStream is now deprecated, + * prefer initStream() which is more general */ +void LZ4_resetStream (LZ4_stream_t* LZ4_stream) +{ + DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream); + MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal)); +} + +void LZ4_resetStream_fast(LZ4_stream_t* ctx) { + LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32); +} + +int LZ4_freeStream (LZ4_stream_t* LZ4_stream) +{ + if (!LZ4_stream) return 0; /* support free on NULL */ + DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream); + FREEMEM(LZ4_stream); + return (0); +} + + +#define HASH_UNIT sizeof(reg_t) +int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize) +{ + LZ4_stream_t_internal* dict = &LZ4_dict->internal_donotuse; + const tableType_t tableType = byU32; + const BYTE* p = (const BYTE*)dictionary; + const BYTE* const dictEnd = p + dictSize; + const BYTE* base; + + DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict); + + /* It's necessary to reset the context, + * and not just continue it with prepareTable() + * to avoid any risk of generating overflowing matchIndex + * when compressing using this dictionary */ + LZ4_resetStream(LZ4_dict); + + /* We always increment the offset by 64 KB, since, if the dict is longer, + * we truncate it to the last 64k, and if it's shorter, we still want to + * advance by a whole window length so we can provide the guarantee that + * there are only valid offsets in the window, which allows an optimization + * in LZ4_compress_fast_continue() where it uses noDictIssue even when the + * dictionary isn't a full 64k. */ + dict->currentOffset += 64 KB; + + if (dictSize < (int)HASH_UNIT) { + return 0; + } + + if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB; + base = dictEnd - dict->currentOffset; + dict->dictionary = p; + dict->dictSize = (U32)(dictEnd - p); + dict->tableType = (U32)tableType; + + while (p <= dictEnd-HASH_UNIT) { + LZ4_putPosition(p, dict->hashTable, tableType, base); + p+=3; + } + + return (int)dict->dictSize; +} + +void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream) { + const LZ4_stream_t_internal* dictCtx = dictionaryStream == NULL ? NULL : + &(dictionaryStream->internal_donotuse); + + DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)", + workingStream, dictionaryStream, + dictCtx != NULL ? dictCtx->dictSize : 0); + + if (dictCtx != NULL) { + /* If the current offset is zero, we will never look in the + * external dictionary context, since there is no value a table + * entry can take that indicate a miss. In that case, we need + * to bump the offset to something non-zero. + */ + if (workingStream->internal_donotuse.currentOffset == 0) { + workingStream->internal_donotuse.currentOffset = 64 KB; + } + + /* Don't actually attach an empty dictionary. + */ + if (dictCtx->dictSize == 0) { + dictCtx = NULL; + } + } + workingStream->internal_donotuse.dictCtx = dictCtx; +} + + +static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize) +{ + assert(nextSize >= 0); + if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) { /* potential ptrdiff_t overflow (32-bits mode) */ + /* rescale hash table */ + U32 const delta = LZ4_dict->currentOffset - 64 KB; + const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize; + int i; + DEBUGLOG(4, "LZ4_renormDictT"); + for (i=0; ihashTable[i] < delta) LZ4_dict->hashTable[i]=0; + else LZ4_dict->hashTable[i] -= delta; + } + LZ4_dict->currentOffset = 64 KB; + if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB; + LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize; + } +} + + +int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, + const char* source, char* dest, + int inputSize, int maxOutputSize, + int acceleration) +{ + const tableType_t tableType = byU32; + LZ4_stream_t_internal* streamPtr = &LZ4_stream->internal_donotuse; + const BYTE* dictEnd = streamPtr->dictionary + streamPtr->dictSize; + + DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i)", inputSize); + + LZ4_renormDictT(streamPtr, inputSize); /* avoid index overflow */ + if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; + if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; + + /* invalidate tiny dictionaries */ + if ( (streamPtr->dictSize-1 < 4-1) /* intentional underflow */ + && (dictEnd != (const BYTE*)source) ) { + DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary); + streamPtr->dictSize = 0; + streamPtr->dictionary = (const BYTE*)source; + dictEnd = (const BYTE*)source; + } + + /* Check overlapping input/dictionary space */ + { const BYTE* sourceEnd = (const BYTE*) source + inputSize; + if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd)) { + streamPtr->dictSize = (U32)(dictEnd - sourceEnd); + if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB; + if (streamPtr->dictSize < 4) streamPtr->dictSize = 0; + streamPtr->dictionary = dictEnd - streamPtr->dictSize; + } + } + + /* prefix mode : source data follows dictionary */ + if (dictEnd == (const BYTE*)source) { + if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) + return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration); + else + return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration); + } + + /* external dictionary mode */ + { int result; + if (streamPtr->dictCtx) { + /* We depend here on the fact that dictCtx'es (produced by + * LZ4_loadDict) guarantee that their tables contain no references + * to offsets between dictCtx->currentOffset - 64 KB and + * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe + * to use noDictIssue even when the dict isn't a full 64 KB. + */ + if (inputSize > 4 KB) { + /* For compressing large blobs, it is faster to pay the setup + * cost to copy the dictionary's tables into the active context, + * so that the compression loop is only looking into one table. + */ + LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr)); + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration); + } else { + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration); + } + } else { + if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) { + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration); + } else { + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration); + } + } + streamPtr->dictionary = (const BYTE*)source; + streamPtr->dictSize = (U32)inputSize; + return result; + } +} + + +/* Hidden debug function, to force-test external dictionary mode */ +int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize) +{ + LZ4_stream_t_internal* streamPtr = &LZ4_dict->internal_donotuse; + int result; + + LZ4_renormDictT(streamPtr, srcSize); + + if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) { + result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1); + } else { + result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1); + } + + streamPtr->dictionary = (const BYTE*)source; + streamPtr->dictSize = (U32)srcSize; + + return result; +} + + +/*! LZ4_saveDict() : + * If previously compressed data block is not guaranteed to remain available at its memory location, + * save it into a safer place (char* safeBuffer). + * Note : you don't need to call LZ4_loadDict() afterwards, + * dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue(). + * Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error. + */ +int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize) +{ + LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse; + const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize; + + if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */ + if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; } + + if (safeBuffer == NULL) assert(dictSize == 0); + if (dictSize > 0) + memmove(safeBuffer, previousDictEnd - dictSize, dictSize); + + dict->dictionary = (const BYTE*)safeBuffer; + dict->dictSize = (U32)dictSize; + + return dictSize; +} + + + +/*-******************************* + * Decompression functions + ********************************/ + +typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; +typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive; + +#undef MIN +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + +/* Read the variable-length literal or match length. + * + * ip - pointer to use as input. + * lencheck - end ip. Return an error if ip advances >= lencheck. + * loop_check - check ip >= lencheck in body of loop. Returns loop_error if so. + * initial_check - check ip >= lencheck before start of loop. Returns initial_error if so. + * error (output) - error code. Should be set to 0 before call. + */ +typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error; +LZ4_FORCE_INLINE unsigned +read_variable_length(const BYTE**ip, const BYTE* lencheck, + int loop_check, int initial_check, + variable_length_error* error) +{ + U32 length = 0; + U32 s; + if (initial_check && unlikely((*ip) >= lencheck)) { /* overflow detection */ + *error = initial_error; + return length; + } + do { + s = **ip; + (*ip)++; + length += s; + if (loop_check && unlikely((*ip) >= lencheck)) { /* overflow detection */ + *error = loop_error; + return length; + } + } while (s==255); + + return length; +} + +/*! LZ4_decompress_generic() : + * This generic decompression function covers all use cases. + * It shall be instantiated several times, using different sets of directives. + * Note that it is important for performance that this function really get inlined, + * in order to remove useless branches during compilation optimization. + */ +LZ4_FORCE_INLINE int +LZ4_decompress_generic( + const char* const src, + char* const dst, + int srcSize, + int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */ + + endCondition_directive endOnInput, /* endOnOutputSize, endOnInputSize */ + earlyEnd_directive partialDecoding, /* full, partial */ + dict_directive dict, /* noDict, withPrefix64k, usingExtDict */ + const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */ + const BYTE* const dictStart, /* only if dict==usingExtDict */ + const size_t dictSize /* note : = 0 if noDict */ + ) +{ + if ((src == NULL) || (outputSize < 0)) { return -1; } + + { const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + + BYTE* op = (BYTE*) dst; + BYTE* const oend = op + outputSize; + BYTE* cpy; + + const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize; + + const int safeDecode = (endOnInput==endOnInputSize); + const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB))); + + + /* Set up the "end" pointers for the shortcut. */ + const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/; + const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/; + + const BYTE* match; + size_t offset; + unsigned token; + size_t length; + + + DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize); + + /* Special cases */ + assert(lowPrefix <= op); + if ((endOnInput) && (unlikely(outputSize==0))) { + /* Empty output buffer */ + if (partialDecoding) return 0; + return ((srcSize==1) && (*ip==0)) ? 0 : -1; + } + if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); } + if ((endOnInput) && unlikely(srcSize==0)) { return -1; } + + /* Currently the fast loop shows a regression on qualcomm arm chips. */ +#if LZ4_FAST_DEC_LOOP + if ((oend - op) < FASTLOOP_SAFE_DISTANCE) { + DEBUGLOG(6, "skip fast decode loop"); + goto safe_decode; + } + + /* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */ + while (1) { + /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */ + assert(oend - op >= FASTLOOP_SAFE_DISTANCE); + if (endOnInput) { assert(ip < iend); } + token = *ip++; + length = token >> ML_BITS; /* literal length */ + + assert(!endOnInput || ip <= iend); /* ip < iend before the increment */ + + /* decode literal length */ + if (length == RUN_MASK) { + variable_length_error error = ok; + length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error); + if (error == initial_error) { goto _output_error; } + if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ + if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ + + /* copy literals */ + cpy = op+length; + LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); + if (endOnInput) { /* LZ4_decompress_safe() */ + if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; } + LZ4_wildCopy32(op, ip, cpy); + } else { /* LZ4_decompress_fast() */ + if (cpy>oend-8) { goto safe_literal_copy; } + LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time : + * it doesn't know input length, and only relies on end-of-block properties */ + } + ip += length; op = cpy; + } else { + cpy = op+length; + if (endOnInput) { /* LZ4_decompress_safe() */ + DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length); + /* We don't need to check oend, since we check it once for each loop below */ + if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; } + /* Literals can only be 14, but hope compilers optimize if we copy by a register size */ + LZ4_memcpy(op, ip, 16); + } else { /* LZ4_decompress_fast() */ + /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time : + * it doesn't know input length, and relies on end-of-block properties */ + LZ4_memcpy(op, ip, 8); + if (length > 8) { LZ4_memcpy(op+8, ip+8, 8); } + } + ip += length; op = cpy; + } + + /* get offset */ + offset = LZ4_readLE16(ip); ip+=2; + match = op - offset; + assert(match <= op); + + /* get matchlength */ + length = token & ML_MASK; + + if (length == ML_MASK) { + variable_length_error error = ok; + if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ + length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error); + if (error != ok) { goto _output_error; } + if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */ + length += MINMATCH; + if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { + goto safe_match_copy; + } + } else { + length += MINMATCH; + if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { + goto safe_match_copy; + } + + /* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */ + if ((dict == withPrefix64k) || (match >= lowPrefix)) { + if (offset >= 8) { + assert(match >= lowPrefix); + assert(match <= op); + assert(op + 18 <= oend); + + LZ4_memcpy(op, match, 8); + LZ4_memcpy(op+8, match+8, 8); + LZ4_memcpy(op+16, match+16, 2); + op += length; + continue; + } } } + + if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ + /* match starting within external dictionary */ + if ((dict==usingExtDict) && (match < lowPrefix)) { + if (unlikely(op+length > oend-LASTLITERALS)) { + if (partialDecoding) { + DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd"); + length = MIN(length, (size_t)(oend-op)); + } else { + goto _output_error; /* end-of-block condition violated */ + } } + + if (length <= (size_t)(lowPrefix-match)) { + /* match fits entirely within external dictionary : just copy */ + memmove(op, dictEnd - (lowPrefix-match), length); + op += length; + } else { + /* match stretches into both external dictionary and current block */ + size_t const copySize = (size_t)(lowPrefix - match); + size_t const restSize = length - copySize; + LZ4_memcpy(op, dictEnd - copySize, copySize); + op += copySize; + if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ + BYTE* const endOfMatch = op + restSize; + const BYTE* copyFrom = lowPrefix; + while (op < endOfMatch) { *op++ = *copyFrom++; } + } else { + LZ4_memcpy(op, lowPrefix, restSize); + op += restSize; + } } + continue; + } + + /* copy match within block */ + cpy = op + length; + + assert((op <= oend) && (oend-op >= 32)); + if (unlikely(offset<16)) { + LZ4_memcpy_using_offset(op, match, cpy, offset); + } else { + LZ4_wildCopy32(op, match, cpy); + } + + op = cpy; /* wildcopy correction */ + } + safe_decode: +#endif + + /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */ + while (1) { + token = *ip++; + length = token >> ML_BITS; /* literal length */ + + assert(!endOnInput || ip <= iend); /* ip < iend before the increment */ + + /* A two-stage shortcut for the most common case: + * 1) If the literal length is 0..14, and there is enough space, + * enter the shortcut and copy 16 bytes on behalf of the literals + * (in the fast mode, only 8 bytes can be safely copied this way). + * 2) Further if the match length is 4..18, copy 18 bytes in a similar + * manner; but we ensure that there's enough space in the output for + * those 18 bytes earlier, upon entering the shortcut (in other words, + * there is a combined check for both stages). + */ + if ( (endOnInput ? length != RUN_MASK : length <= 8) + /* strictly "less than" on input, to re-enter the loop with at least one byte */ + && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) { + /* Copy the literals */ + LZ4_memcpy(op, ip, endOnInput ? 16 : 8); + op += length; ip += length; + + /* The second stage: prepare for match copying, decode full info. + * If it doesn't work out, the info won't be wasted. */ + length = token & ML_MASK; /* match length */ + offset = LZ4_readLE16(ip); ip += 2; + match = op - offset; + assert(match <= op); /* check overflow */ + + /* Do not deal with overlapping matches. */ + if ( (length != ML_MASK) + && (offset >= 8) + && (dict==withPrefix64k || match >= lowPrefix) ) { + /* Copy the match. */ + LZ4_memcpy(op + 0, match + 0, 8); + LZ4_memcpy(op + 8, match + 8, 8); + LZ4_memcpy(op +16, match +16, 2); + op += length + MINMATCH; + /* Both stages worked, load the next token. */ + continue; + } + + /* The second stage didn't work out, but the info is ready. + * Propel it right to the point of match copying. */ + goto _copy_match; + } + + /* decode literal length */ + if (length == RUN_MASK) { + variable_length_error error = ok; + length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error); + if (error == initial_error) { goto _output_error; } + if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ + if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ + } + + /* copy literals */ + cpy = op+length; +#if LZ4_FAST_DEC_LOOP + safe_literal_copy: +#endif + LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); + if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) ) + || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) ) + { + /* We've either hit the input parsing restriction or the output parsing restriction. + * In the normal scenario, decoding a full block, it must be the last sequence, + * otherwise it's an error (invalid input or dimensions). + * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow. + */ + if (partialDecoding) { + /* Since we are partial decoding we may be in this block because of the output parsing + * restriction, which is not valid since the output buffer is allowed to be undersized. + */ + assert(endOnInput); + DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end") + DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length); + DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op)); + DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip)); + /* Finishing in the middle of a literals segment, + * due to lack of input. + */ + if (ip+length > iend) { + length = (size_t)(iend-ip); + cpy = op + length; + } + /* Finishing in the middle of a literals segment, + * due to lack of output space. + */ + if (cpy > oend) { + cpy = oend; + assert(op<=oend); + length = (size_t)(oend-op); + } + } else { + /* We must be on the last sequence because of the parsing limitations so check + * that we exactly regenerate the original size (must be exact when !endOnInput). + */ + if ((!endOnInput) && (cpy != oend)) { goto _output_error; } + /* We must be on the last sequence (or invalid) because of the parsing limitations + * so check that we exactly consume the input and don't overrun the output buffer. + */ + if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) { + DEBUGLOG(6, "should have been last run of literals") + DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend); + DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend); + goto _output_error; + } + } + memmove(op, ip, length); /* supports overlapping memory regions; only matters for in-place decompression scenarios */ + ip += length; + op += length; + /* Necessarily EOF when !partialDecoding. + * When partialDecoding, it is EOF if we've either + * filled the output buffer or + * can't proceed with reading an offset for following match. + */ + if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) { + break; + } + } else { + LZ4_wildCopy8(op, ip, cpy); /* may overwrite up to WILDCOPYLENGTH beyond cpy */ + ip += length; op = cpy; + } + + /* get offset */ + offset = LZ4_readLE16(ip); ip+=2; + match = op - offset; + + /* get matchlength */ + length = token & ML_MASK; + + _copy_match: + if (length == ML_MASK) { + variable_length_error error = ok; + length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error); + if (error != ok) goto _output_error; + if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */ + } + length += MINMATCH; + +#if LZ4_FAST_DEC_LOOP + safe_match_copy: +#endif + if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */ + /* match starting within external dictionary */ + if ((dict==usingExtDict) && (match < lowPrefix)) { + if (unlikely(op+length > oend-LASTLITERALS)) { + if (partialDecoding) length = MIN(length, (size_t)(oend-op)); + else goto _output_error; /* doesn't respect parsing restriction */ + } + + if (length <= (size_t)(lowPrefix-match)) { + /* match fits entirely within external dictionary : just copy */ + memmove(op, dictEnd - (lowPrefix-match), length); + op += length; + } else { + /* match stretches into both external dictionary and current block */ + size_t const copySize = (size_t)(lowPrefix - match); + size_t const restSize = length - copySize; + LZ4_memcpy(op, dictEnd - copySize, copySize); + op += copySize; + if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ + BYTE* const endOfMatch = op + restSize; + const BYTE* copyFrom = lowPrefix; + while (op < endOfMatch) *op++ = *copyFrom++; + } else { + LZ4_memcpy(op, lowPrefix, restSize); + op += restSize; + } } + continue; + } + assert(match >= lowPrefix); + + /* copy match within block */ + cpy = op + length; + + /* partialDecoding : may end anywhere within the block */ + assert(op<=oend); + if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { + size_t const mlen = MIN(length, (size_t)(oend-op)); + const BYTE* const matchEnd = match + mlen; + BYTE* const copyEnd = op + mlen; + if (matchEnd > op) { /* overlap copy */ + while (op < copyEnd) { *op++ = *match++; } + } else { + LZ4_memcpy(op, match, mlen); + } + op = copyEnd; + if (op == oend) { break; } + continue; + } + + if (unlikely(offset<8)) { + LZ4_write32(op, 0); /* silence msan warning when offset==0 */ + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += inc32table[offset]; + LZ4_memcpy(op+4, match, 4); + match -= dec64table[offset]; + } else { + LZ4_memcpy(op, match, 8); + match += 8; + } + op += 8; + + if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { + BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1); + if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */ + if (op < oCopyLimit) { + LZ4_wildCopy8(op, match, oCopyLimit); + match += oCopyLimit - op; + op = oCopyLimit; + } + while (op < cpy) { *op++ = *match++; } + } else { + LZ4_memcpy(op, match, 8); + if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); } + } + op = cpy; /* wildcopy correction */ + } + + /* end of decoding */ + if (endOnInput) { + DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst)); + return (int) (((char*)op)-dst); /* Nb of output bytes decoded */ + } else { + return (int) (((const char*)ip)-src); /* Nb of input bytes read */ + } + + /* Overflow error detected */ + _output_error: + return (int) (-(((const char*)ip)-src))-1; + } +} + + +/*===== Instantiate the API decoding functions. =====*/ + +LZ4_FORCE_O2 +int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, + endOnInputSize, decode_full_block, noDict, + (BYTE*)dest, NULL, 0); +} + +LZ4_FORCE_O2 +int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity) +{ + dstCapacity = MIN(targetOutputSize, dstCapacity); + return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity, + endOnInputSize, partial_decode, + noDict, (BYTE*)dst, NULL, 0); +} + +LZ4_FORCE_O2 +int LZ4_decompress_fast(const char* source, char* dest, int originalSize) +{ + return LZ4_decompress_generic(source, dest, 0, originalSize, + endOnOutputSize, decode_full_block, withPrefix64k, + (BYTE*)dest - 64 KB, NULL, 0); +} + +/*===== Instantiate a few more decoding cases, used more than once. =====*/ + +LZ4_FORCE_O2 /* Exported, an obsolete API function. */ +int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + endOnInputSize, decode_full_block, withPrefix64k, + (BYTE*)dest - 64 KB, NULL, 0); +} + +/* Another obsolete API function, paired with the previous one. */ +int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize) +{ + /* LZ4_decompress_fast doesn't validate match offsets, + * and thus serves well with any prefixed dictionary. */ + return LZ4_decompress_fast(source, dest, originalSize); +} + +LZ4_FORCE_O2 +static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize, + size_t prefixSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + endOnInputSize, decode_full_block, noDict, + (BYTE*)dest-prefixSize, NULL, 0); +} + +LZ4_FORCE_O2 +int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, + int compressedSize, int maxOutputSize, + const void* dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + endOnInputSize, decode_full_block, usingExtDict, + (BYTE*)dest, (const BYTE*)dictStart, dictSize); +} + +LZ4_FORCE_O2 +static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize, + const void* dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, 0, originalSize, + endOnOutputSize, decode_full_block, usingExtDict, + (BYTE*)dest, (const BYTE*)dictStart, dictSize); +} + +/* The "double dictionary" mode, for use with e.g. ring buffers: the first part + * of the dictionary is passed as prefix, and the second via dictStart + dictSize. + * These routines are used only once, in LZ4_decompress_*_continue(). + */ +LZ4_FORCE_INLINE +int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize, + size_t prefixSize, const void* dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + endOnInputSize, decode_full_block, usingExtDict, + (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize); +} + +LZ4_FORCE_INLINE +int LZ4_decompress_fast_doubleDict(const char* source, char* dest, int originalSize, + size_t prefixSize, const void* dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, 0, originalSize, + endOnOutputSize, decode_full_block, usingExtDict, + (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize); +} + +/*===== streaming decompression functions =====*/ + +LZ4_streamDecode_t* LZ4_createStreamDecode(void) +{ + LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t)); + LZ4_STATIC_ASSERT(LZ4_STREAMDECODESIZE >= sizeof(LZ4_streamDecode_t_internal)); /* A compilation error here means LZ4_STREAMDECODESIZE is not large enough */ + return lz4s; +} + +int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream) +{ + if (LZ4_stream == NULL) { return 0; } /* support free on NULL */ + FREEMEM(LZ4_stream); + return 0; +} + +/*! LZ4_setStreamDecode() : + * Use this function to instruct where to find the dictionary. + * This function is not necessary if previous data is still available where it was decoded. + * Loading a size of 0 is allowed (same effect as no dictionary). + * @return : 1 if OK, 0 if error + */ +int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize) +{ + LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; + lz4sd->prefixSize = (size_t) dictSize; + lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize; + lz4sd->externalDict = NULL; + lz4sd->extDictSize = 0; + return 1; +} + +/*! LZ4_decoderRingBufferSize() : + * when setting a ring buffer for streaming decompression (optional scenario), + * provides the minimum size of this ring buffer + * to be compatible with any source respecting maxBlockSize condition. + * Note : in a ring buffer scenario, + * blocks are presumed decompressed next to each other. + * When not enough space remains for next block (remainingSize < maxBlockSize), + * decoding resumes from beginning of ring buffer. + * @return : minimum ring buffer size, + * or 0 if there is an error (invalid maxBlockSize). + */ +int LZ4_decoderRingBufferSize(int maxBlockSize) +{ + if (maxBlockSize < 0) return 0; + if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0; + if (maxBlockSize < 16) maxBlockSize = 16; + return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize); +} + +/* +*_continue() : + These decoding functions allow decompression of multiple blocks in "streaming" mode. + Previously decoded blocks must still be available at the memory position where they were decoded. + If it's not possible, save the relevant part of decoded data into a safe buffer, + and indicate where it stands using LZ4_setStreamDecode() +*/ +LZ4_FORCE_O2 +int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize) +{ + LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; + int result; + + if (lz4sd->prefixSize == 0) { + /* The first call, no dictionary yet. */ + assert(lz4sd->extDictSize == 0); + result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize); + if (result <= 0) return result; + lz4sd->prefixSize = (size_t)result; + lz4sd->prefixEnd = (BYTE*)dest + result; + } else if (lz4sd->prefixEnd == (BYTE*)dest) { + /* They're rolling the current segment. */ + if (lz4sd->prefixSize >= 64 KB - 1) + result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize); + else if (lz4sd->extDictSize == 0) + result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, + lz4sd->prefixSize); + else + result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize, + lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize += (size_t)result; + lz4sd->prefixEnd += result; + } else { + /* The buffer wraps around, or they're switching to another buffer. */ + lz4sd->extDictSize = lz4sd->prefixSize; + lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; + result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, + lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize = (size_t)result; + lz4sd->prefixEnd = (BYTE*)dest + result; + } + + return result; +} + +LZ4_FORCE_O2 +int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize) +{ + LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; + int result; + assert(originalSize >= 0); + + if (lz4sd->prefixSize == 0) { + assert(lz4sd->extDictSize == 0); + result = LZ4_decompress_fast(source, dest, originalSize); + if (result <= 0) return result; + lz4sd->prefixSize = (size_t)originalSize; + lz4sd->prefixEnd = (BYTE*)dest + originalSize; + } else if (lz4sd->prefixEnd == (BYTE*)dest) { + if (lz4sd->prefixSize >= 64 KB - 1 || lz4sd->extDictSize == 0) + result = LZ4_decompress_fast(source, dest, originalSize); + else + result = LZ4_decompress_fast_doubleDict(source, dest, originalSize, + lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize += (size_t)originalSize; + lz4sd->prefixEnd += originalSize; + } else { + lz4sd->extDictSize = lz4sd->prefixSize; + lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; + result = LZ4_decompress_fast_extDict(source, dest, originalSize, + lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize = (size_t)originalSize; + lz4sd->prefixEnd = (BYTE*)dest + originalSize; + } + + return result; +} + + +/* +Advanced decoding functions : +*_usingDict() : + These decoding functions work the same as "_continue" ones, + the dictionary must be explicitly provided within parameters +*/ + +int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize) +{ + if (dictSize==0) + return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize); + if (dictStart+dictSize == dest) { + if (dictSize >= 64 KB - 1) { + return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize); + } + assert(dictSize >= 0); + return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize); + } + assert(dictSize >= 0); + return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize); +} + +int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize) +{ + if (dictSize==0 || dictStart+dictSize == dest) + return LZ4_decompress_fast(source, dest, originalSize); + assert(dictSize >= 0); + return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize); +} + + +/*=************************************************* +* Obsolete Functions +***************************************************/ +/* obsolete compression functions */ +int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) +{ + return LZ4_compress_default(source, dest, inputSize, maxOutputSize); +} +int LZ4_compress(const char* src, char* dest, int srcSize) +{ + return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize)); +} +int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize) +{ + return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1); +} +int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize) +{ + return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1); +} +int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity) +{ + return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1); +} +int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize) +{ + return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1); +} + +/* +These decompression functions are deprecated and should no longer be used. +They are only provided here for compatibility with older user programs. +- LZ4_uncompress is totally equivalent to LZ4_decompress_fast +- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe +*/ +int LZ4_uncompress (const char* source, char* dest, int outputSize) +{ + return LZ4_decompress_fast(source, dest, outputSize); +} +int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) +{ + return LZ4_decompress_safe(source, dest, isize, maxOutputSize); +} + +/* Obsolete Streaming functions */ + +int LZ4_sizeofStreamState(void) { return LZ4_STREAMSIZE; } + +int LZ4_resetStreamState(void* state, char* inputBuffer) +{ + (void)inputBuffer; + LZ4_resetStream((LZ4_stream_t*)state); + return 0; +} + +void* LZ4_create (char* inputBuffer) +{ + (void)inputBuffer; + return LZ4_createStream(); +} + +char* LZ4_slideInputBuffer (void* state) +{ + /* avoid const char * -> char * conversion warning */ + return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary; +} + +#endif /* LZ4_COMMONDEFS_ONLY */ diff --git a/dev/ese/src/_lz4/lz4.h b/dev/ese/src/_lz4/lz4.h new file mode 100644 index 00000000..0b11eab0 --- /dev/null +++ b/dev/ese/src/_lz4/lz4.h @@ -0,0 +1,774 @@ +/* + * LZ4 - Fast LZ compression algorithm + * Header File + * Copyright (C) 2011-2020, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 homepage : http://www.lz4.org + - LZ4 source repository : https://github.com/lz4/lz4 +*/ +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef LZ4_H_2983827168210 +#define LZ4_H_2983827168210 + +/* --- Dependency --- */ +#include /* size_t */ + + +/** + Introduction + + LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core, + scalable with multi-cores CPU. It features an extremely fast decoder, with speed in + multiple GB/s per core, typically reaching RAM speed limits on multi-core systems. + + The LZ4 compression library provides in-memory compression and decompression functions. + It gives full buffer control to user. + Compression can be done in: + - a single step (described as Simple Functions) + - a single step, reusing a context (described in Advanced Functions) + - unbounded multiple steps (described as Streaming compression) + + lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md). + Decompressing such a compressed block requires additional metadata. + Exact metadata depends on exact decompression function. + For the typical case of LZ4_decompress_safe(), + metadata includes block's compressed size, and maximum bound of decompressed size. + Each application is free to encode and pass such metadata in whichever way it wants. + + lz4.h only handle blocks, it can not generate Frames. + + Blocks are different from Frames (doc/lz4_Frame_format.md). + Frames bundle both blocks and metadata in a specified manner. + Embedding metadata is required for compressed data to be self-contained and portable. + Frame format is delivered through a companion API, declared in lz4frame.h. + The `lz4` CLI can only manage frames. +*/ + +/*^*************************************************************** +* Export parameters +*****************************************************************/ +/* +* LZ4_DLL_EXPORT : +* Enable exporting of functions when building a Windows DLL +* LZ4LIB_VISIBILITY : +* Control library symbols visibility. +*/ +#ifndef LZ4LIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define LZ4LIB_VISIBILITY +# endif +#endif +#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1) +# define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY +#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1) +# define LZ4LIB_API __declspec(dllimport) LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define LZ4LIB_API LZ4LIB_VISIBILITY +#endif + +/*------ Version ------*/ +#define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */ +#define LZ4_VERSION_MINOR 9 /* for new (non-breaking) interface capabilities */ +#define LZ4_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */ + +#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE) + +#define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE +#define LZ4_QUOTE(str) #str +#define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str) +#define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION) + +LZ4LIB_API int LZ4_versionNumber (void); /**< library version number; useful to check dll version */ +LZ4LIB_API const char* LZ4_versionString (void); /**< library version string; useful to check dll version */ + + +/*-************************************ +* Tuning parameter +**************************************/ +/*! + * LZ4_MEMORY_USAGE : + * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) + * Increasing memory usage improves compression ratio. + * Reduced memory usage may improve speed, thanks to better cache locality. + * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache + */ +#ifndef LZ4_MEMORY_USAGE +# define LZ4_MEMORY_USAGE 14 +#endif + + +/*-************************************ +* Simple Functions +**************************************/ +/*! LZ4_compress_default() : + * Compresses 'srcSize' bytes from buffer 'src' + * into already allocated 'dst' buffer of size 'dstCapacity'. + * Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize). + * It also runs faster, so it's a recommended setting. + * If the function cannot compress 'src' into a more limited 'dst' budget, + * compression stops *immediately*, and the function result is zero. + * In which case, 'dst' content is undefined (invalid). + * srcSize : max supported value is LZ4_MAX_INPUT_SIZE. + * dstCapacity : size of buffer 'dst' (which must be already allocated) + * @return : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity) + * or 0 if compression fails + * Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer). + */ +LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity); + +/*! LZ4_decompress_safe() : + * compressedSize : is the exact complete size of the compressed block. + * dstCapacity : is the size of destination buffer (which must be already allocated), presumed an upper bound of decompressed size. + * @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity) + * If destination buffer is not large enough, decoding will stop and output an error code (negative value). + * If the source stream is detected malformed, the function will stop decoding and return a negative result. + * Note 1 : This function is protected against malicious data packets : + * it will never writes outside 'dst' buffer, nor read outside 'source' buffer, + * even if the compressed block is maliciously modified to order the decoder to do these actions. + * In such case, the decoder stops immediately, and considers the compressed block malformed. + * Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them. + * The implementation is free to send / store / derive this information in whichever way is most beneficial. + * If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead. + */ +LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity); + + +/*-************************************ +* Advanced Functions +**************************************/ +#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */ +#define LZ4_COMPRESSBOUND(isize) ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16) + +/*! LZ4_compressBound() : + Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible) + This function is primarily useful for memory allocation purposes (destination buffer size). + Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example). + Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize) + inputSize : max supported value is LZ4_MAX_INPUT_SIZE + return : maximum output size in a "worst case" scenario + or 0, if input size is incorrect (too large or negative) +*/ +LZ4LIB_API int LZ4_compressBound(int inputSize); + +/*! LZ4_compress_fast() : + Same as LZ4_compress_default(), but allows selection of "acceleration" factor. + The larger the acceleration value, the faster the algorithm, but also the lesser the compression. + It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed. + An acceleration value of "1" is the same as regular LZ4_compress_default() + Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c). + Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c). +*/ +LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + + +/*! LZ4_compress_fast_extState() : + * Same as LZ4_compress_fast(), using an externally allocated memory space for its state. + * Use LZ4_sizeofState() to know how much memory must be allocated, + * and allocate it on 8-bytes boundaries (using `malloc()` typically). + * Then, provide this buffer as `void* state` to compression function. + */ +LZ4LIB_API int LZ4_sizeofState(void); +LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + + +/*! LZ4_compress_destSize() : + * Reverse the logic : compresses as much data as possible from 'src' buffer + * into already allocated buffer 'dst', of size >= 'targetDestSize'. + * This function either compresses the entire 'src' content into 'dst' if it's large enough, + * or fill 'dst' buffer completely with as much data as possible from 'src'. + * note: acceleration parameter is fixed to "default". + * + * *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'. + * New value is necessarily <= input value. + * @return : Nb bytes written into 'dst' (necessarily <= targetDestSize) + * or 0 if compression fails. + * + * Note : from v1.8.2 to v1.9.1, this function had a bug (fixed un v1.9.2+): + * the produced compressed content could, in specific circumstances, + * require to be decompressed into a destination buffer larger + * by at least 1 byte than the content to decompress. + * If an application uses `LZ4_compress_destSize()`, + * it's highly recommended to update liblz4 to v1.9.2 or better. + * If this can't be done or ensured, + * the receiving decompression function should provide + * a dstCapacity which is > decompressedSize, by at least 1 byte. + * See https://github.com/lz4/lz4/issues/859 for details + */ +LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize); + + +/*! LZ4_decompress_safe_partial() : + * Decompress an LZ4 compressed block, of size 'srcSize' at position 'src', + * into destination buffer 'dst' of size 'dstCapacity'. + * Up to 'targetOutputSize' bytes will be decoded. + * The function stops decoding on reaching this objective. + * This can be useful to boost performance + * whenever only the beginning of a block is required. + * + * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize) + * If source stream is detected malformed, function returns a negative result. + * + * Note 1 : @return can be < targetOutputSize, if compressed block contains less data. + * + * Note 2 : targetOutputSize must be <= dstCapacity + * + * Note 3 : this function effectively stops decoding on reaching targetOutputSize, + * so dstCapacity is kind of redundant. + * This is because in older versions of this function, + * decoding operation would still write complete sequences. + * Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize, + * it could write more bytes, though only up to dstCapacity. + * Some "margin" used to be required for this operation to work properly. + * Thankfully, this is no longer necessary. + * The function nonetheless keeps the same signature, in an effort to preserve API compatibility. + * + * Note 4 : If srcSize is the exact size of the block, + * then targetOutputSize can be any value, + * including larger than the block's decompressed size. + * The function will, at most, generate block's decompressed size. + * + * Note 5 : If srcSize is _larger_ than block's compressed size, + * then targetOutputSize **MUST** be <= block's decompressed size. + * Otherwise, *silent corruption will occur*. + */ +LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity); + + +/*-********************************************* +* Streaming Compression Functions +***********************************************/ +typedef union LZ4_stream_u LZ4_stream_t; /* incomplete type (defined later) */ + +LZ4LIB_API LZ4_stream_t* LZ4_createStream(void); +LZ4LIB_API int LZ4_freeStream (LZ4_stream_t* streamPtr); + +/*! LZ4_resetStream_fast() : v1.9.0+ + * Use this to prepare an LZ4_stream_t for a new chain of dependent blocks + * (e.g., LZ4_compress_fast_continue()). + * + * An LZ4_stream_t must be initialized once before usage. + * This is automatically done when created by LZ4_createStream(). + * However, should the LZ4_stream_t be simply declared on stack (for example), + * it's necessary to initialize it first, using LZ4_initStream(). + * + * After init, start any new stream with LZ4_resetStream_fast(). + * A same LZ4_stream_t can be re-used multiple times consecutively + * and compress multiple streams, + * provided that it starts each new stream with LZ4_resetStream_fast(). + * + * LZ4_resetStream_fast() is much faster than LZ4_initStream(), + * but is not compatible with memory regions containing garbage data. + * + * Note: it's only useful to call LZ4_resetStream_fast() + * in the context of streaming compression. + * The *extState* functions perform their own resets. + * Invoking LZ4_resetStream_fast() before is redundant, and even counterproductive. + */ +LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr); + +/*! LZ4_loadDict() : + * Use this function to reference a static dictionary into LZ4_stream_t. + * The dictionary must remain available during compression. + * LZ4_loadDict() triggers a reset, so any previous data will be forgotten. + * The same dictionary will have to be loaded on decompression side for successful decoding. + * Dictionary are useful for better compression of small data (KB range). + * While LZ4 accept any input as dictionary, + * results are generally better when using Zstandard's Dictionary Builder. + * Loading a size of 0 is allowed, and is the same as reset. + * @return : loaded dictionary size, in bytes (necessarily <= 64 KB) + */ +LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize); + +/*! LZ4_compress_fast_continue() : + * Compress 'src' content using data from previously compressed blocks, for better compression ratio. + * 'dst' buffer must be already allocated. + * If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster. + * + * @return : size of compressed block + * or 0 if there is an error (typically, cannot fit into 'dst'). + * + * Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block. + * Each block has precise boundaries. + * Each block must be decompressed separately, calling LZ4_decompress_*() with relevant metadata. + * It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together. + * + * Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory ! + * + * Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB. + * Make sure that buffers are separated, by at least one byte. + * This construction ensures that each block only depends on previous block. + * + * Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB. + * + * Note 5 : After an error, the stream status is undefined (invalid), it can only be reset or freed. + */ +LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + +/*! LZ4_saveDict() : + * If last 64KB data cannot be guaranteed to remain available at its current memory location, + * save it into a safer place (char* safeBuffer). + * This is schematically equivalent to a memcpy() followed by LZ4_loadDict(), + * but is much faster, because LZ4_saveDict() doesn't need to rebuild tables. + * @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error. + */ +LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize); + + +/*-********************************************** +* Streaming Decompression Functions +* Bufferless synchronous API +************************************************/ +typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* tracking context */ + +/*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() : + * creation / destruction of streaming decompression tracking context. + * A tracking context can be re-used multiple times. + */ +LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void); +LZ4LIB_API int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream); + +/*! LZ4_setStreamDecode() : + * An LZ4_streamDecode_t context can be allocated once and re-used multiple times. + * Use this function to start decompression of a new stream of blocks. + * A dictionary can optionally be set. Use NULL or size 0 for a reset order. + * Dictionary is presumed stable : it must remain accessible and unmodified during next decompression. + * @return : 1 if OK, 0 if error + */ +LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize); + +/*! LZ4_decoderRingBufferSize() : v1.8.2+ + * Note : in a ring buffer scenario (optional), + * blocks are presumed decompressed next to each other + * up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize), + * at which stage it resumes from beginning of ring buffer. + * When setting such a ring buffer for streaming decompression, + * provides the minimum size of this ring buffer + * to be compatible with any source respecting maxBlockSize condition. + * @return : minimum ring buffer size, + * or 0 if there is an error (invalid maxBlockSize). + */ +LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize); +#define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) (65536 + 14 + (maxBlockSize)) /* for static allocation; maxBlockSize presumed valid */ + +/*! LZ4_decompress_*_continue() : + * These decoding functions allow decompression of consecutive blocks in "streaming" mode. + * A block is an unsplittable entity, it must be presented entirely to a decompression function. + * Decompression functions only accepts one block at a time. + * The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded. + * If less than 64KB of data has been decoded, all the data must be present. + * + * Special : if decompression side sets a ring buffer, it must respect one of the following conditions : + * - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize). + * maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes. + * In which case, encoding and decoding buffers do not need to be synchronized. + * Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize. + * - Synchronized mode : + * Decompression buffer size is _exactly_ the same as compression buffer size, + * and follows exactly same update rule (block boundaries at same positions), + * and decoding function is provided with exact decompressed size of each block (exception for last block of the stream), + * _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB). + * - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes. + * In which case, encoding and decoding buffers do not need to be synchronized, + * and encoding ring buffer can have any size, including small ones ( < 64 KB). + * + * Whenever these conditions are not possible, + * save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression, + * then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block. +*/ +LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity); + + +/*! LZ4_decompress_*_usingDict() : + * These decoding functions work the same as + * a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue() + * They are stand-alone, and don't need an LZ4_streamDecode_t structure. + * Dictionary is presumed stable : it must remain accessible and unmodified during decompression. + * Performance tip : Decompression speed can be substantially increased + * when dst == dictStart + dictSize. + */ +LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize); + +#endif /* LZ4_H_2983827168210 */ + + +/*^************************************* + * !!!!!! STATIC LINKING ONLY !!!!!! + ***************************************/ + +/*-**************************************************************************** + * Experimental section + * + * Symbols declared in this section must be considered unstable. Their + * signatures or semantics may change, or they may be removed altogether in the + * future. They are therefore only safe to depend on when the caller is + * statically linked against the library. + * + * To protect against unsafe usage, not only are the declarations guarded, + * the definitions are hidden by default + * when building LZ4 as a shared/dynamic library. + * + * In order to access these declarations, + * define LZ4_STATIC_LINKING_ONLY in your application + * before including LZ4's headers. + * + * In order to make their implementations accessible dynamically, you must + * define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library. + ******************************************************************************/ + +#ifdef LZ4_STATIC_LINKING_ONLY + +#ifndef LZ4_STATIC_3504398509 +#define LZ4_STATIC_3504398509 + +#ifdef LZ4_PUBLISH_STATIC_FUNCTIONS +#define LZ4LIB_STATIC_API LZ4LIB_API +#else +#define LZ4LIB_STATIC_API +#endif + + +/*! LZ4_compress_fast_extState_fastReset() : + * A variant of LZ4_compress_fast_extState(). + * + * Using this variant avoids an expensive initialization step. + * It is only safe to call if the state buffer is known to be correctly initialized already + * (see above comment on LZ4_resetStream_fast() for a definition of "correctly initialized"). + * From a high level, the difference is that + * this function initializes the provided state with a call to something like LZ4_resetStream_fast() + * while LZ4_compress_fast_extState() starts with a call to LZ4_resetStream(). + */ +LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + +/*! LZ4_attach_dictionary() : + * This is an experimental API that allows + * efficient use of a static dictionary many times. + * + * Rather than re-loading the dictionary buffer into a working context before + * each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a + * working LZ4_stream_t, this function introduces a no-copy setup mechanism, + * in which the working stream references the dictionary stream in-place. + * + * Several assumptions are made about the state of the dictionary stream. + * Currently, only streams which have been prepared by LZ4_loadDict() should + * be expected to work. + * + * Alternatively, the provided dictionaryStream may be NULL, + * in which case any existing dictionary stream is unset. + * + * If a dictionary is provided, it replaces any pre-existing stream history. + * The dictionary contents are the only history that can be referenced and + * logically immediately precede the data compressed in the first subsequent + * compression call. + * + * The dictionary will only remain attached to the working stream through the + * first compression call, at the end of which it is cleared. The dictionary + * stream (and source buffer) must remain in-place / accessible / unchanged + * through the completion of the first compression call on the stream. + */ +LZ4LIB_STATIC_API void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream); + + +/*! In-place compression and decompression + * + * It's possible to have input and output sharing the same buffer, + * for highly constrained memory environments. + * In both cases, it requires input to lay at the end of the buffer, + * and decompression to start at beginning of the buffer. + * Buffer size must feature some margin, hence be larger than final size. + * + * |<------------------------buffer--------------------------------->| + * |<-----------compressed data--------->| + * |<-----------decompressed size------------------>| + * |<----margin---->| + * + * This technique is more useful for decompression, + * since decompressed size is typically larger, + * and margin is short. + * + * In-place decompression will work inside any buffer + * which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize). + * This presumes that decompressedSize > compressedSize. + * Otherwise, it means compression actually expanded data, + * and it would be more efficient to store such data with a flag indicating it's not compressed. + * This can happen when data is not compressible (already compressed, or encrypted). + * + * For in-place compression, margin is larger, as it must be able to cope with both + * history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX, + * and data expansion, which can happen when input is not compressible. + * As a consequence, buffer size requirements are much higher, + * and memory savings offered by in-place compression are more limited. + * + * There are ways to limit this cost for compression : + * - Reduce history size, by modifying LZ4_DISTANCE_MAX. + * Note that it is a compile-time constant, so all compressions will apply this limit. + * Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX, + * so it's a reasonable trick when inputs are known to be small. + * - Require the compressor to deliver a "maximum compressed size". + * This is the `dstCapacity` parameter in `LZ4_compress*()`. + * When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail, + * in which case, the return code will be 0 (zero). + * The caller must be ready for these cases to happen, + * and typically design a backup scheme to send data uncompressed. + * The combination of both techniques can significantly reduce + * the amount of margin required for in-place compression. + * + * In-place compression can work in any buffer + * which size is >= (maxCompressedSize) + * with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success. + * LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX, + * so it's possible to reduce memory requirements by playing with them. + */ + +#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize) (((compressedSize) >> 8) + 32) +#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize) ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize)) /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */ + +#ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */ +# define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */ +#endif + +#define LZ4_COMPRESS_INPLACE_MARGIN (LZ4_DISTANCE_MAX + 32) /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */ +#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize) ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN) /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */ + +#endif /* LZ4_STATIC_3504398509 */ +#endif /* LZ4_STATIC_LINKING_ONLY */ + + + +#ifndef LZ4_H_98237428734687 +#define LZ4_H_98237428734687 + +/*-************************************************************ + * Private Definitions + ************************************************************** + * Do not use these definitions directly. + * They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`. + * Accessing members will expose user code to API and/or ABI break in future versions of the library. + **************************************************************/ +#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2) +#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE) +#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) /* required as macro for static allocation */ + +#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# include + typedef int8_t LZ4_i8; + typedef uint8_t LZ4_byte; + typedef uint16_t LZ4_u16; + typedef uint32_t LZ4_u32; +#else + typedef signed char LZ4_i8; + typedef unsigned char LZ4_byte; + typedef unsigned short LZ4_u16; + typedef unsigned int LZ4_u32; +#endif + +typedef struct LZ4_stream_t_internal LZ4_stream_t_internal; +struct LZ4_stream_t_internal { + LZ4_u32 hashTable[LZ4_HASH_SIZE_U32]; + LZ4_u32 currentOffset; + LZ4_u32 tableType; + const LZ4_byte* dictionary; + const LZ4_stream_t_internal* dictCtx; + LZ4_u32 dictSize; +}; + +typedef struct { + const LZ4_byte* externalDict; + size_t extDictSize; + const LZ4_byte* prefixEnd; + size_t prefixSize; +} LZ4_streamDecode_t_internal; + + +/*! LZ4_stream_t : + * Do not use below internal definitions directly ! + * Declare or allocate an LZ4_stream_t instead. + * LZ4_stream_t can also be created using LZ4_createStream(), which is recommended. + * The structure definition can be convenient for static allocation + * (on stack, or as part of larger structure). + * Init this structure with LZ4_initStream() before first use. + * note : only use this definition in association with static linking ! + * this definition is not API/ABI safe, and may change in future versions. + */ +#define LZ4_STREAMSIZE 16416 /* static size, for inter-version compatibility */ +#define LZ4_STREAMSIZE_VOIDP (LZ4_STREAMSIZE / sizeof(void*)) +union LZ4_stream_u { + void* table[LZ4_STREAMSIZE_VOIDP]; + LZ4_stream_t_internal internal_donotuse; +}; /* previously typedef'd to LZ4_stream_t */ + + +/*! LZ4_initStream() : v1.9.0+ + * An LZ4_stream_t structure must be initialized at least once. + * This is automatically done when invoking LZ4_createStream(), + * but it's not when the structure is simply declared on stack (for example). + * + * Use LZ4_initStream() to properly initialize a newly declared LZ4_stream_t. + * It can also initialize any arbitrary buffer of sufficient size, + * and will @return a pointer of proper type upon initialization. + * + * Note : initialization fails if size and alignment conditions are not respected. + * In which case, the function will @return NULL. + * Note2: An LZ4_stream_t structure guarantees correct alignment and size. + * Note3: Before v1.9.0, use LZ4_resetStream() instead + */ +LZ4LIB_API LZ4_stream_t* LZ4_initStream (void* buffer, size_t size); + + +/*! LZ4_streamDecode_t : + * information structure to track an LZ4 stream during decompression. + * init this structure using LZ4_setStreamDecode() before first use. + * note : only use in association with static linking ! + * this definition is not API/ABI safe, + * and may change in a future version ! + */ +#define LZ4_STREAMDECODESIZE_U64 (4 + ((sizeof(void*)==16) ? 2 : 0) /*AS-400*/ ) +#define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long)) +union LZ4_streamDecode_u { + unsigned long long table[LZ4_STREAMDECODESIZE_U64]; + LZ4_streamDecode_t_internal internal_donotuse; +} ; /* previously typedef'd to LZ4_streamDecode_t */ + + + +/*-************************************ +* Obsolete Functions +**************************************/ + +/*! Deprecation warnings + * + * Deprecated functions make the compiler generate a warning when invoked. + * This is meant to invite users to update their source code. + * Should deprecation warnings be a problem, it is generally possible to disable them, + * typically with -Wno-deprecated-declarations for gcc + * or _CRT_SECURE_NO_WARNINGS in Visual. + * + * Another method is to define LZ4_DISABLE_DEPRECATE_WARNINGS + * before including the header file. + */ +#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS +# define LZ4_DEPRECATED(message) /* disable deprecation warnings */ +#else +# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ +# define LZ4_DEPRECATED(message) [[deprecated(message)]] +# elif defined(_MSC_VER) +# define LZ4_DEPRECATED(message) __declspec(deprecated(message)) +# elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45)) +# define LZ4_DEPRECATED(message) __attribute__((deprecated(message))) +# elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31) +# define LZ4_DEPRECATED(message) __attribute__((deprecated)) +# else +# pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler") +# define LZ4_DEPRECATED(message) /* disabled */ +# endif +#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */ + +/*! Obsolete compression functions (since v1.7.3) */ +LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* src, char* dest, int srcSize); +LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize); + +/*! Obsolete decompression functions (since v1.8.0) */ +LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize); +LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); + +/* Obsolete streaming functions (since v1.7.0) + * degraded functionality; do not use! + * + * In order to perform streaming compression, these functions depended on data + * that is no longer tracked in the state. They have been preserved as well as + * possible: using them will still produce a correct output. However, they don't + * actually retain any history between compression calls. The compression ratio + * achieved will therefore be no better than compressing each chunk + * independently. + */ +LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer); +LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int LZ4_sizeofStreamState(void); +LZ4_DEPRECATED("Use LZ4_resetStream() instead") LZ4LIB_API int LZ4_resetStreamState(void* state, char* inputBuffer); +LZ4_DEPRECATED("Use LZ4_saveDict() instead") LZ4LIB_API char* LZ4_slideInputBuffer (void* state); + +/*! Obsolete streaming decoding functions (since v1.7.0) */ +LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize); +LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize); + +/*! Obsolete LZ4_decompress_fast variants (since v1.9.0) : + * These functions used to be faster than LZ4_decompress_safe(), + * but this is no longer the case. They are now slower. + * This is because LZ4_decompress_fast() doesn't know the input size, + * and therefore must progress more cautiously into the input buffer to not read beyond the end of block. + * On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability. + * As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated. + * + * The last remaining LZ4_decompress_fast() specificity is that + * it can decompress a block without knowing its compressed size. + * Such functionality can be achieved in a more secure manner + * by employing LZ4_decompress_safe_partial(). + * + * Parameters: + * originalSize : is the uncompressed size to regenerate. + * `dst` must be already allocated, its size must be >= 'originalSize' bytes. + * @return : number of bytes read from source buffer (== compressed size). + * The function expects to finish at block's end exactly. + * If the source stream is detected malformed, the function stops decoding and returns a negative result. + * note : LZ4_decompress_fast*() requires originalSize. Thanks to this information, it never writes past the output buffer. + * However, since it doesn't know its 'src' size, it may read an unknown amount of input, past input buffer bounds. + * Also, since match offsets are not validated, match reads from 'src' may underflow too. + * These issues never happen if input (compressed) data is correct. + * But they may happen if input data is invalid (error or intentional tampering). + * As a consequence, use these functions in trusted environments with trusted data **only**. + */ +LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead") +LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize); +LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead") +LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize); +LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_usingDict() instead") +LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize); + +/*! LZ4_resetStream() : + * An LZ4_stream_t structure must be initialized at least once. + * This is done with LZ4_initStream(), or LZ4_resetStream(). + * Consider switching to LZ4_initStream(), + * invoking LZ4_resetStream() will trigger deprecation warnings in the future. + */ +LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr); + + +#endif /* LZ4_H_98237428734687 */ + + +#if defined (__cplusplus) +} +#endif diff --git a/dev/ese/src/ese/CMakeLists.txt b/dev/ese/src/ese/CMakeLists.txt index 006be537..fb0f0af4 100644 --- a/dev/ese/src/ese/CMakeLists.txt +++ b/dev/ese/src/ese/CMakeLists.txt @@ -88,6 +88,7 @@ set(ESE_LIBRARIES dht sync _errstr + _lz4 _xpress _perfctrs _log diff --git a/dev/ese/src/os/blockcache/_blockcache.hxx b/dev/ese/src/os/blockcache/_blockcache.hxx index 640658a1..e895f131 100644 --- a/dev/ese/src/os/blockcache/_blockcache.hxx +++ b/dev/ese/src/os/blockcache/_blockcache.hxx @@ -5,6 +5,7 @@ #include "tcconst.hxx" #include "checksum.hxx" +#include "lz4.h" #ifdef ESENT #include "jetmsg.h" diff --git a/dev/ese/src/os/blockcache/_journalentry.hxx b/dev/ese/src/os/blockcache/_journalentry.hxx index 91568477..b95e1570 100644 --- a/dev/ese/src/os/blockcache/_journalentry.hxx +++ b/dev/ese/src/os/blockcache/_journalentry.hxx @@ -182,10 +182,12 @@ class TCompressedJournalEntry : public TJournalEntryBase // { caInvalid = 0, caLegacyXpressHuffman = 1, + caLz4 = 2, }; static const CompressionAlgorithm caInvalid = CompressionAlgorithm::caInvalid; static const CompressionAlgorithm caLegacyXpressHuffman = CompressionAlgorithm::caLegacyXpressHuffman; + static const CompressionAlgorithm caLz4 = CompressionAlgorithm::caLz4; TCompressedJournalEntry( _In_ const TJournalEntry* const pje, _In_ const CompressionAlgorithm ca, @@ -214,7 +216,6 @@ INLINE ERR TCompressedJournalEntry::ErrCreate( _In_ cons ERR err = JET_errSuccess; void* pv = NULL; CompressionAlgorithm ca = caInvalid; - USHORT compressionFormat = s_rgusCompressionFormats[ (int)caInvalid ]; NTSTATUS status = 0; ULONG cbWorkspace = 0; ULONG cbUnused = 0; @@ -232,33 +233,55 @@ INLINE ERR TCompressedJournalEntry::ErrCreate( _In_ cons // determine our compression algorithm ca = caLegacyXpressHuffman; - compressionFormat = s_rgusCompressionFormats[ (int)(CompressionAlgorithm)ca ]; - // allocate our workspace for compression + // compress the data - status = g_pfnRtlGetCompressionWorkSpaceSize( compressionFormat, &cbWorkspace, &cbUnused ); - if ( status >= 0 ) + if ( ca == caLz4 ) { - Alloc( rgbWorkspace = new BYTE[ cbWorkspace ] ); - } + cbCompressed = LZ4_compress_default( (char*)pje, + (char*)((CCompressedJournalEntry*)pv)->m_rgbCompressed, + pje->Cb(), + pje->Cb() ); - // try to compress the journal entry + // determine if we successfully compressed the journal entry - if ( status >= 0 ) + fCompressed = fCompressed && cbCompressed > 0; + } + else if ( ca == caLegacyXpressHuffman ) { - status = g_pfnRtlCompressBuffer( compressionFormat, - (PUCHAR)pje, - pje->Cb(), - (PUCHAR)((CCompressedJournalEntry*)pv)->m_rgbCompressed, - pje->Cb(), - 4096, - &cbCompressed, - rgbWorkspace ); + // determine our compression format + + const USHORT compressionFormat = s_rgusCompressionFormats[ (int)(CompressionAlgorithm)ca ]; + + // allocate our workspace for compression + + status = g_pfnRtlGetCompressionWorkSpaceSize( compressionFormat, &cbWorkspace, &cbUnused ); + if ( status >= 0 ) + { + Alloc( rgbWorkspace = new BYTE[ cbWorkspace ] ); + } + + // try to compress the journal entry + + if ( status >= 0 ) + { + status = g_pfnRtlCompressBuffer( compressionFormat, + (PUCHAR)pje, + pje->Cb(), + (PUCHAR)((CCompressedJournalEntry*)pv)->m_rgbCompressed, + pje->Cb(), + 4096, + &cbCompressed, + rgbWorkspace ); + } + + // determine if we successfully compressed the journal entry + + fCompressed = fCompressed && status >= 0; } // determine if we successfully compressed the journal entry - fCompressed = fCompressed && status >= 0; fCompressed = fCompressed && sizeof( CCompressedJournalEntry ) + cbCompressed < pje->Cb(); fCompressed = fCompressed && pje->Jetyp() != JETYPCOMPRESSED; @@ -303,7 +326,6 @@ INLINE ERR TCompressedJournalEntry::ErrExtract( _In_ cons ERR err = JET_errSuccess; const CCompressedJournalEntry* const pcje = (const CCompressedJournalEntry*)jb.Rgb(); void* pv = NULL; - USHORT compressionFormat = s_rgusCompressionFormats[ (int)caInvalid ]; NTSTATUS status = 0; ULONG cbWorkspace = 0; ULONG cbUnused = 0; @@ -327,11 +349,24 @@ INLINE ERR TCompressedJournalEntry::ErrExtract( _In_ cons // decompress based on the algorithm - if ( pcje->m_le_ca == caLegacyXpressHuffman ) + if ( pcje->m_le_ca == caLz4 ) + { + cbUncompressed = LZ4_decompress_safe_partial( (char*)pcje->m_rgbCompressed, + (char*)pv, + pcje->CbCompressed(), + pcje->m_le_cbUncompressed, + pcje->m_le_cbUncompressed ); + + if ( cbUncompressed <= 0 ) + { + Error( ErrBlockCacheInternalError( L"", "CompressedJournalEntryDecompressionFailure" ) ); + } + } + else if ( pcje->m_le_ca == caLegacyXpressHuffman ) { - // determine our compression algorithm + // determine our compression format - compressionFormat = s_rgusCompressionFormats[ (int)(CompressionAlgorithm)pcje->m_le_ca ]; + const USHORT compressionFormat = s_rgusCompressionFormats[ (int)(CompressionAlgorithm)pcje->m_le_ca ]; // allocate our workspace for compression @@ -353,29 +388,32 @@ INLINE ERR TCompressedJournalEntry::ErrExtract( _In_ cons { Error( ErrBlockCacheInternalError( L"", "CompressedJournalEntryDecompressionFailure" ) ); } - if ( cbUncompressed != pcje->m_le_cbUncompressed ) - { - Error( ErrBlockCacheInternalError( L"", "CompressedJournalEntrySizeMismatch" ) ); - } - if ( Crc32Checksum( (const BYTE*)pv, pcje->m_le_cbUncompressed ) != pcje->m_le_crc32Uncompressed ) - { - Error( ErrBlockCacheInternalError( L"", "CompressedJournalEntryChecksumMismatch" ) ); - } - Call( ErrValidate( CJournalBuffer( pcje->m_le_cbUncompressed, (const BYTE*)pv ) ) ); - if ( ((const TJournalEntry*)pv)->Jetyp() == JETYPCOMPRESSED ) - { - Error( ErrBlockCacheInternalError( L"", "CompressedJournalEntryTypeMismatch" ) ); - } - - // get the journal entry - - pje = (const TJournalEntry*)pv; - pv = NULL; } else { Error( ErrBlockCacheInternalError( L"", "CompressedJournalEntryUnknownAlgorithm" ) ); } + + // validate the decompressed journal entry + + if ( cbUncompressed != pcje->m_le_cbUncompressed ) + { + Error( ErrBlockCacheInternalError( L"", "CompressedJournalEntrySizeMismatch" ) ); + } + if ( Crc32Checksum( (const BYTE*)pv, pcje->m_le_cbUncompressed ) != pcje->m_le_crc32Uncompressed ) + { + Error( ErrBlockCacheInternalError( L"", "CompressedJournalEntryChecksumMismatch" ) ); + } + Call( ErrValidate( CJournalBuffer( pcje->m_le_cbUncompressed, (const BYTE*)pv ) ) ); + if ( ( ( const TJournalEntry* )pv )->Jetyp() == JETYPCOMPRESSED ) + { + Error( ErrBlockCacheInternalError( L"", "CompressedJournalEntryTypeMismatch" ) ); + } + + // get the journal entry + + pje = ( const TJournalEntry* )pv; + pv = NULL; } // if this is any other type of journal entry then just copy it @@ -419,10 +457,13 @@ template USHORT TCompressedJournalEntry::s_rgusCompressionFormats[] = { // caInvalid - COMPRESSION_FORMAT_NONE, + NULL, // caLegacyXpressHuffman COMPRESSION_FORMAT_XPRESS_HUFF | COMPRESSION_ENGINE_STANDARD, + + // caLz4 + NULL, }; diff --git a/dev/ese/src/os/litent/CMakeLists.txt b/dev/ese/src/os/litent/CMakeLists.txt index 4d196cfe..026c5c0b 100644 --- a/dev/ese/src/os/litent/CMakeLists.txt +++ b/dev/ese/src/os/litent/CMakeLists.txt @@ -36,4 +36,8 @@ target_precompile_headers(oslite PRIVATE target_include_directories(oslite PRIVATE ./ +) + +target_link_libraries(oslite PRIVATE + _lz4 ) \ No newline at end of file diff --git a/dev/ese/src/os/winnt/CMakeLists.txt b/dev/ese/src/os/winnt/CMakeLists.txt index 411744ac..56a3dfc4 100644 --- a/dev/ese/src/os/winnt/CMakeLists.txt +++ b/dev/ese/src/os/winnt/CMakeLists.txt @@ -36,4 +36,8 @@ target_compile_definitions(oswinnt PRIVATE target_include_directories(oswinnt PRIVATE ${ESE_DEV}/src/inc/_osu/ ./ +) + +target_link_libraries(oswinnt PRIVATE + _lz4 ) \ No newline at end of file From 0cfec2ad410952156af84ec8fd09ae94969d3482 Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Wed, 12 Oct 2022 16:03:24 +0000 Subject: [PATCH 058/102] ESE Block Cache: Perf: Extend IFileAPI::ErrFlushFileBuffers with FileFlushMode to enable data only flushes In prod, we are spending a lot of CPU and IO repeatedly flushing the NTFS journal whenever we call ErrFlushFileBuffers on the Caching File or the Cached File. FlushFileBuffers will cause NTFS to write out the latest state of the file's metadata even if the update is trivial (e.g. Last Access Time). This is pointless for the EBC scenario. It is also the only cost here because the actual SYNC command is short circuited because the write cache is disabled on the storage. The lowest risk fix for this is to use the new (to Win10 RS1) flag to NtFlushBuffersFileEx called FLUSH_FLAGS_FILE_DATA_SYNC_ONLY. This flag causes any file data to be flushed but skips meta-data changes and also causes a SYNC command to be sent when needed. This flag is implemented by the OS in such a way that if it isn't recognized then we fall back to the legacy behavior. IFileAPI::ErrFlushFileBuffers now takes a FileFlushMode with two settings: ffmAll (the current behavior) and ffmDataOnly (the new behavior). EBC has been modified to use the new ffmDataOnly mode. This API is documented here: https://learn.microsoft.com/en-us/windows-hardware/drivers/ddi/ntifs/nf-ntifs-ntflushbuffersfileex [Substrate:c0d4e863da254a2502d1187b42749b80c4e96811] --- dev/ese/published/inc/os/memfile.hxx | 2 +- dev/ese/published/inc/os/osblockcache.hxx | 4 +- dev/ese/published/inc/os/osfileapi.hxx | 33 +++++++++--- .../blockcache/interop/CFileFilterWrapper.h | 8 ++- .../noncore/blockcache/interop/CFileWrapper.h | 22 ++++---- .../src/noncore/blockcache/interop/FileBase.h | 22 ++++---- .../blockcache/interop/FileFilterBase.h | 8 +-- .../blockcache/interop/FileFilterRemotable.h | 4 +- .../blockcache/interop/FileRemotable.h | 14 ++--- .../src/noncore/blockcache/interop/IFile.h | 17 +++--- .../noncore/blockcache/interop/IFileFilter.h | 3 +- .../src/noncore/blockcache/interop/Stdafx.h | 1 + dev/ese/src/os/_osfile.hxx | 5 +- dev/ese/src/os/blockcache/_cachefactory.hxx | 2 +- dev/ese/src/os/blockcache/_filefilter.hxx | 20 ++++--- .../src/os/blockcache/_filefilterwrapper.hxx | 16 +++--- dev/ese/src/os/blockcache/_filewrapper.hxx | 21 ++++---- dev/ese/src/os/blockcache/_fsfilter.hxx | 2 +- .../src/os/blockcache/_hashedlrukcache.hxx | 16 +++--- .../_hashedlrukcachewritecountsmanager.hxx | 2 +- .../os/blockcache/_journalsegmentmanager.hxx | 2 +- dev/ese/src/os/osfile.cxx | 52 +++++++++++++++++-- 22 files changed, 179 insertions(+), 97 deletions(-) diff --git a/dev/ese/published/inc/os/memfile.hxx b/dev/ese/published/inc/os/memfile.hxx index 521ec8e8..efc3676f 100644 --- a/dev/ese/published/inc/os/memfile.hxx +++ b/dev/ese/published/inc/os/memfile.hxx @@ -40,7 +40,7 @@ public: ERR ErrRetrieveAllocatedRegion( const QWORD ibOffsetToQuery, _Out_ QWORD* const pibStartTrimmedRegion, _Out_ QWORD* const pcbTrimmed ) override { AssertSz( fFalse, "NYI!" ); return ErrERRCheck( JET_errNyi ); } - ERR ErrFlushFileBuffers( const IOFLUSHREASON iofr ) override { AssertSz( fFalse, "NYI!" ); return ErrERRCheck( JET_errNyi ); } + ERR ErrFlushFileBuffers( _In_ const IOFLUSHREASON iofr, _In_ const IFileAPI::FileFlushMode ffm ) override { AssertSz( fFalse, "NYI!" ); return ErrERRCheck( JET_errNyi ); } void SetNoFlushNeeded() override { AssertSz( fFalse, "NYI!" ); } ERR ErrIOSize( DWORD* const pcbSize ) override { AssertSz( fFalse, "NYI!" ); return ErrERRCheck( JET_errNyi ); } ERR ErrSectorSize( DWORD* const pcbSize ) override { AssertSz( fFalse, "NYI!" ); return ErrERRCheck( JET_errNyi ); } diff --git a/dev/ese/published/inc/os/osblockcache.hxx b/dev/ese/published/inc/os/osblockcache.hxx index c369ca10..7765ece9 100644 --- a/dev/ese/published/inc/os/osblockcache.hxx +++ b/dev/ese/published/inc/os/osblockcache.hxx @@ -155,7 +155,9 @@ class IFileFilter // ff // Flushes all data previously written for the current file. - virtual ERR ErrFlush( _In_ const IOFLUSHREASON iofr, _In_ const IFileFilter::IOMode iom ) = 0; + virtual ERR ErrFlush( _In_ const IOFLUSHREASON iofr, + _In_ const IFileAPI::FileFlushMode ffm, + _In_ const IFileFilter::IOMode iom ) = 0; }; constexpr IFileFilter::IOMode iomRaw = IFileFilter::IOMode::iomRaw; diff --git a/dev/ese/published/inc/os/osfileapi.hxx b/dev/ese/published/inc/os/osfileapi.hxx index e20080fe..63fce7cc 100644 --- a/dev/ese/published/inc/os/osfileapi.hxx +++ b/dev/ese/published/inc/os/osfileapi.hxx @@ -357,7 +357,29 @@ class IFileAPI // fapi _Out_ QWORD* const pibStartTrimmedRegion, _Out_ QWORD* const pcbTrimmed ) = 0; - virtual ERR ErrFlushFileBuffers( const IOFLUSHREASON iofr ) = 0; + // Flush + + // Flags indicating the portion of a file's metadata and/or data to flush. + + enum class FileFlushMode // ffm + { + ffmAll = 0, // Flush the file's metadata and data. + ffmDataOnly = 1, // Flush the file's data only. + }; + + // Synchronously flushes the requested portion of the file's metadata and/or data. + + virtual ERR ErrFlushFileBuffers( _In_ const IOFLUSHREASON iofr, + _In_ const IFileAPI::FileFlushMode ffm = IFileAPI::FileFlushMode::ffmAll ) = 0; + + // Returns the number of Write IOs that are unflushed or flush pending + // since the last ErrFlushFileBuffers call. + + virtual LONG64 CioNonFlushed() const = 0; + + // Indicates that a flush is not required for any Write IOs that are unflushed or + // flush pending since the last ErrFlushFileBuffers call. + virtual void SetNoFlushNeeded() = 0; // I/O @@ -517,11 +539,6 @@ class IFileAPI // fapi virtual ERR ErrDiskId( ULONG_PTR* const pulDiskId ) const = 0; - // get number of Write IOs that are unflushed or flush pending - // since last ErrFlushFileBuffers call - - virtual LONG64 CioNonFlushed() const = 0; - // get seek penalty (in order to identify SSD) virtual BOOL FSeekPenalty() const = 0; @@ -538,6 +555,10 @@ class IFileAPI // fapi DEFINE_ENUM_FLAG_OPERATORS_BASIC( IFileAPI::FileModeFlags ) +constexpr IFileAPI::FileFlushMode ffmAll = IFileAPI::FileFlushMode::ffmAll; +constexpr IFileAPI::FileFlushMode ffmDataOnly = IFileAPI::FileFlushMode::ffmDataOnly; + + // Exposing for log zero filling extern QWORD g_cbZero; extern BYTE* g_rgbZero; diff --git a/dev/ese/src/noncore/blockcache/interop/CFileFilterWrapper.h b/dev/ese/src/noncore/blockcache/interop/CFileFilterWrapper.h index ae62df9d..0fdb3182 100644 --- a/dev/ese/src/noncore/blockcache/interop/CFileFilterWrapper.h +++ b/dev/ese/src/noncore/blockcache/interop/CFileFilterWrapper.h @@ -49,7 +49,9 @@ namespace Internal ERR ErrIssue( _In_ const ::IFileFilter::IOMode iom ) override; - ERR ErrFlush( _In_ const IOFLUSHREASON iofr, _In_ const ::IFileFilter::IOMode iom ) override; + ERR ErrFlush( _In_ const IOFLUSHREASON iofr, + _In_ const IFileAPI::FileFlushMode ffm, + _In_ const ::IFileFilter::IOMode iom ) override; }; template< class TM, class TN > @@ -188,11 +190,13 @@ namespace Internal template< class TM, class TN > inline ERR CFileFilterWrapper::ErrFlush( _In_ const IOFLUSHREASON iofr, + _In_ const IFileAPI::FileFlushMode ffm, _In_ const ::IFileFilter::IOMode iom ) { ERR err = JET_errSuccess; - ExCall( I()->Flush( (Internal::Ese::BlockCache::Interop::IOMode)iom ) ); + ExCall( I()->Flush( (Internal::Ese::BlockCache::Interop::FileFlushMode)ffm, + (Internal::Ese::BlockCache::Interop::IOMode)iom ) ); HandleError: return err; diff --git a/dev/ese/src/noncore/blockcache/interop/CFileWrapper.h b/dev/ese/src/noncore/blockcache/interop/CFileWrapper.h index 3f594038..f88f1e66 100644 --- a/dev/ese/src/noncore/blockcache/interop/CFileWrapper.h +++ b/dev/ese/src/noncore/blockcache/interop/CFileWrapper.h @@ -24,7 +24,8 @@ namespace Internal IFileAPI::FileModeFlags Fmf() const override; - ERR ErrFlushFileBuffers( const IOFLUSHREASON iofr ) override; + ERR ErrFlushFileBuffers( _In_ const IOFLUSHREASON iofr, _In_ const IFileAPI::FileFlushMode ffm ) override; + LONG64 CioNonFlushed() const override; void SetNoFlushNeeded() override; ERR ErrPath( _Out_bytecap_c_(cbOSFSAPI_MAX_PATHW) WCHAR* const wszAbsPath ) override; @@ -105,8 +106,6 @@ namespace Internal ERR ErrDiskId( ULONG_PTR* const pulDiskId ) const override; - LONG64 CioNonFlushed() const override; - BOOL FSeekPenalty() const override; #ifdef DEBUG @@ -124,16 +123,23 @@ namespace Internal } template< class TM, class TN > - inline ERR CFileWrapper::ErrFlushFileBuffers( const IOFLUSHREASON iofr ) + inline ERR CFileWrapper::ErrFlushFileBuffers( _In_ const IOFLUSHREASON iofr, + _In_ const IFileAPI::FileFlushMode ffm ) { ERR err = JET_errSuccess; - ExCall( I()->FlushFileBuffers() ); + ExCall( I()->FlushFileBuffers( (Internal::Ese::BlockCache::Interop::FileFlushMode)ffm ) ); HandleError: return err; } + template< class TM, class TN > + inline LONG64 CFileWrapper::CioNonFlushed() const + { + return I()->CountIoNonFlushed(); + } + template< class TM, class TN > inline void CFileWrapper::SetNoFlushNeeded() { @@ -528,12 +534,6 @@ namespace Internal return err; } - template< class TM, class TN > - inline LONG64 CFileWrapper::CioNonFlushed() const - { - return I()->CountIoNonFlushed(); - } - template< class TM, class TN > inline BOOL CFileWrapper::FSeekPenalty() const { diff --git a/dev/ese/src/noncore/blockcache/interop/FileBase.h b/dev/ese/src/noncore/blockcache/interop/FileBase.h index 96712b2e..f086b9ad 100644 --- a/dev/ese/src/noncore/blockcache/interop/FileBase.h +++ b/dev/ese/src/noncore/blockcache/interop/FileBase.h @@ -28,7 +28,9 @@ namespace Internal virtual FileModeFlags FileModeFlags(); - virtual void FlushFileBuffers(); + virtual void FlushFileBuffers( FileFlushMode fileFlushMode ); + + virtual Int64 CountIoNonFlushed(); virtual void SetNoFlushNeeded(); @@ -77,8 +79,6 @@ namespace Internal virtual IntPtr DiskId(); - virtual Int64 CountIoNonFlushed(); - virtual bool SeekPenalty(); }; @@ -89,11 +89,11 @@ namespace Internal } template< class TM, class TN, class TW > - inline void FileBase::FlushFileBuffers() + inline void FileBase::FlushFileBuffers( FileFlushMode fileFlushMode ) { ERR err = JET_errSuccess; - Call( Pi->ErrFlushFileBuffers( (IOFLUSHREASON)0 ) ); + Call( Pi->ErrFlushFileBuffers( (IOFLUSHREASON)0, (IFileAPI::FileFlushMode)fileFlushMode ) ); return; @@ -101,6 +101,12 @@ namespace Internal throw EseException( err ); } + template< class TM, class TN, class TW > + inline Int64 FileBase::CountIoNonFlushed() + { + return Pi->CioNonFlushed(); + } + template< class TM, class TN, class TW > inline void FileBase::SetNoFlushNeeded() { @@ -415,12 +421,6 @@ namespace Internal throw EseException( err ); } - template< class TM, class TN, class TW > - inline Int64 FileBase::CountIoNonFlushed() - { - return Pi->CioNonFlushed(); - } - template< class TM, class TN, class TW > inline bool FileBase::SeekPenalty() { diff --git a/dev/ese/src/noncore/blockcache/interop/FileFilterBase.h b/dev/ese/src/noncore/blockcache/interop/FileFilterBase.h index 0e29a5e2..6a8788b2 100644 --- a/dev/ese/src/noncore/blockcache/interop/FileFilterBase.h +++ b/dev/ese/src/noncore/blockcache/interop/FileFilterBase.h @@ -47,7 +47,7 @@ namespace Internal virtual void Issue( IOMode ioMode ); - virtual void Flush( IOMode ioMode ); + virtual void Flush( FileFlushMode fileFlushMode, IOMode ioMode ); }; template< class TM, class TN, class TW > @@ -205,11 +205,13 @@ namespace Internal } template< class TM, class TN, class TW > - inline void FileFilterBase::Flush( IOMode ioMode ) + inline void FileFilterBase::Flush( FileFlushMode fileFlushMode, IOMode ioMode ) { ERR err = JET_errSuccess; - Call( Pi->ErrFlush( (IOFLUSHREASON)0, (::IFileFilter::IOMode)ioMode ) ); + Call( Pi->ErrFlush( (IOFLUSHREASON)0, + (IFileAPI::FileFlushMode)fileFlushMode, + (::IFileFilter::IOMode)ioMode ) ); return; diff --git a/dev/ese/src/noncore/blockcache/interop/FileFilterRemotable.h b/dev/ese/src/noncore/blockcache/interop/FileFilterRemotable.h index 074efffc..242e7b8a 100644 --- a/dev/ese/src/noncore/blockcache/interop/FileFilterRemotable.h +++ b/dev/ese/src/noncore/blockcache/interop/FileFilterRemotable.h @@ -62,9 +62,9 @@ namespace Internal this->target->Issue( ioMode ); } - virtual void Flush( IOMode ioMode ) + virtual void Flush( FileFlushMode fileFlushMode, IOMode ioMode ) { - this->target->Flush( ioMode ); + this->target->Flush( fileFlushMode, ioMode ); } private: diff --git a/dev/ese/src/noncore/blockcache/interop/FileRemotable.h b/dev/ese/src/noncore/blockcache/interop/FileRemotable.h index efa8c680..a7bf2971 100644 --- a/dev/ese/src/noncore/blockcache/interop/FileRemotable.h +++ b/dev/ese/src/noncore/blockcache/interop/FileRemotable.h @@ -31,9 +31,14 @@ namespace Internal return this->target->FileModeFlags(); } - virtual void FlushFileBuffers() + virtual void FlushFileBuffers( FileFlushMode fileFlushMode ) { - this->target->FlushFileBuffers(); + this->target->FlushFileBuffers( fileFlushMode ); + } + + virtual Int64 CountIoNonFlushed() + { + return this->target->CountIoNonFlushed(); } virtual void SetNoFlushNeeded() @@ -134,11 +139,6 @@ namespace Internal return this->target->DiskId(); } - virtual Int64 CountIoNonFlushed() - { - return this->target->CountIoNonFlushed(); - } - virtual bool SeekPenalty() { return this->target->SeekPenalty(); diff --git a/dev/ese/src/noncore/blockcache/interop/IFile.h b/dev/ese/src/noncore/blockcache/interop/IFile.h index 8471fdb9..1e7b816c 100644 --- a/dev/ese/src/noncore/blockcache/interop/IFile.h +++ b/dev/ese/src/noncore/blockcache/interop/IFile.h @@ -69,7 +69,15 @@ namespace Internal /// /// Causes any previous writes to the file to be written to the media. /// - void FlushFileBuffers(); + /// File flush mode. + void FlushFileBuffers( FileFlushMode fileFlushMode ); + + /// + /// Returns the number of Write IOs that are unflushed or flush pending + /// since the last FlushFileBuffers call. + /// + /// The current unflushed write count for the file. + Int64 CountIoNonFlushed(); /// /// Indicates that the caller doesn't care if any previous writes to the file make it to the media. @@ -270,13 +278,6 @@ namespace Internal /// The disk ID of the file. IntPtr DiskId(); - /// - /// Get number of write IOs that are unflushed or flush pending - /// since last ErrFlushFileBuffers call. - /// - /// The current unflushed write count for the file. - Int64 CountIoNonFlushed(); - /// /// Get seek penalty (in order to identify SSD). /// diff --git a/dev/ese/src/noncore/blockcache/interop/IFileFilter.h b/dev/ese/src/noncore/blockcache/interop/IFileFilter.h index 6c989ae7..1cb0f58c 100644 --- a/dev/ese/src/noncore/blockcache/interop/IFileFilter.h +++ b/dev/ese/src/noncore/blockcache/interop/IFileFilter.h @@ -122,8 +122,9 @@ namespace Internal /// /// Flushes all data previously written for the current file. /// + /// File flush mode. /// IO operation mode. - void Flush( IOMode ioMode ); + void Flush( FileFlushMode fileFlushMode, IOMode ioMode ); }; } } diff --git a/dev/ese/src/noncore/blockcache/interop/Stdafx.h b/dev/ese/src/noncore/blockcache/interop/Stdafx.h index a488fce5..76e8f165 100644 --- a/dev/ese/src/noncore/blockcache/interop/Stdafx.h +++ b/dev/ese/src/noncore/blockcache/interop/Stdafx.h @@ -22,6 +22,7 @@ #include "FileIdentification.h" #include "FileModeFlags.h" +#include "FileFlushMode.h" #include "FileQOS.h" #include "FileSize.h" #include "IFile.h" diff --git a/dev/ese/src/os/_osfile.hxx b/dev/ese/src/os/_osfile.hxx index c80b6e43..e680eda9 100644 --- a/dev/ese/src/os/_osfile.hxx +++ b/dev/ese/src/os/_osfile.hxx @@ -168,7 +168,8 @@ class COSFile // osf IFileAPI::FileModeFlags Fmf() const override; - ERR ErrFlushFileBuffers( const IOFLUSHREASON iofr ) override; + ERR ErrFlushFileBuffers( _In_ const IOFLUSHREASON iofr, _In_ const FileFlushMode ffm ) override; + LONG64 CioNonFlushed() const override; void SetNoFlushNeeded() override; ERR ErrPath( _Out_bytecap_c_(cbOSFSAPI_MAX_PATHW) WCHAR* const wszAbsPath ) override; @@ -283,8 +284,6 @@ class COSFile // osf ERR ErrDiskId( ULONG_PTR* const pulDiskId ) const override; - LONG64 CioNonFlushed() const override; - BOOL FSeekPenalty() const override { return m_posv->FSeekPenalty(); diff --git a/dev/ese/src/os/blockcache/_cachefactory.hxx b/dev/ese/src/os/blockcache/_cachefactory.hxx index d32508b3..9de1a9d5 100644 --- a/dev/ese/src/os/blockcache/_cachefactory.hxx +++ b/dev/ese/src/os/blockcache/_cachefactory.hxx @@ -112,7 +112,7 @@ INLINE ERR CCacheFactory::ErrCreate( _In_ IFileSystemFilter* const // flush the caching file - Call( (*ppffCaching)->ErrFlushFileBuffers( iofrBlockCache ) ); + Call( (*ppffCaching)->ErrFlushFileBuffers( iofrBlockCache, ffmAll ) ); // get the block cache configuration diff --git a/dev/ese/src/os/blockcache/_filefilter.hxx b/dev/ese/src/os/blockcache/_filefilter.hxx index f2b7cbfe..5b5df9cc 100644 --- a/dev/ese/src/os/blockcache/_filefilter.hxx +++ b/dev/ese/src/os/blockcache/_filefilter.hxx @@ -96,7 +96,7 @@ class TFileFilter // ff public: // IFileAPI - ERR ErrFlushFileBuffers( _In_ const IOFLUSHREASON iofr ) override; + ERR ErrFlushFileBuffers( _In_ const IOFLUSHREASON iofr, _In_ const IFileAPI::FileFlushMode ffm ) override; void SetNoFlushNeeded() override; ERR ErrSetSize( _In_ const TraceContext& tc, @@ -162,7 +162,9 @@ class TFileFilter // ff _In_opt_ const DWORD_PTR keyIOComplete, _In_opt_ const IFileAPI::PfnIOHandoff pfnIOHandoff ) override; ERR ErrIssue( _In_ const IFileFilter::IOMode iom ) override; - ERR ErrFlush( _In_ const IOFLUSHREASON iofr, _In_ const IFileFilter::IOMode iom ) override; + ERR ErrFlush( _In_ const IOFLUSHREASON iofr, + _In_ const IFileAPI::FileFlushMode ffm, + _In_ const IFileFilter::IOMode iom ) override; private: @@ -2446,14 +2448,14 @@ ERR TFileFilter::ErrGetPhysicalId( _Out_ VolumeId* const pvolumeid, } template< class I > -ERR TFileFilter::ErrFlushFileBuffers( _In_ const IOFLUSHREASON iofr ) +ERR TFileFilter::ErrFlushFileBuffers( _In_ const IOFLUSHREASON iofr, _In_ const IFileAPI::FileFlushMode ffm ) { ERR err = JET_errSuccess; const LONG64 ciosDelta = AtomicExchange( &m_cioUnflushed, 0 ); AtomicAdd( (QWORD*)&m_cioFlushing, ciosDelta ); - Call( ErrFlush( iofr, iomEngine ) ); + Call( ErrFlush( iofr, ffm, iomEngine ) ); HandleError: if ( err < JET_errSuccess ) @@ -2879,7 +2881,9 @@ HandleError: } template< class I > -ERR TFileFilter::ErrFlush( _In_ const IOFLUSHREASON iofr, _In_ const IFileFilter::IOMode iom ) +ERR TFileFilter::ErrFlush( _In_ const IOFLUSHREASON iofr, + _In_ const IFileAPI::FileFlushMode ffm, + _In_ const IFileFilter::IOMode iom ) { ERR err = JET_errSuccess; BOOL fFlush = fFalse; @@ -2891,7 +2895,7 @@ ERR TFileFilter::ErrFlush( _In_ const IOFLUSHREASON iofr, _In_ const IFileFil iom == iomCacheWriteThrough || iom == iomCacheWriteBack ); - OSTrace( JET_tracetagBlockCache, OSFormat( "%s ErrFlushFileBuffers iom=%u", OSFormat( this ), iom ) ); + OSTrace( JET_tracetagBlockCache, OSFormat( "%s ErrFlush ffm=%u iom=%u", OSFormat( this ), ffm, iom ) ); switch ( iom ) { @@ -2924,7 +2928,7 @@ ERR TFileFilter::ErrFlush( _In_ const IOFLUSHREASON iofr, _In_ const IFileFil if ( fFlush ) { - Call( TFileWrapper::ErrFlushFileBuffers( iofr ) ); + Call( TFileWrapper::ErrFlushFileBuffers( iofr, ffm ) ); } HandleError: @@ -3296,7 +3300,7 @@ ERR TFileFilter::ErrAttach( _In_ const COffsets& offsetsFirstWrite ) NULL ) ); fPresumeAttached = fTrue; - Call( ErrFlushFileBuffers( iofrBlockCache ) ); + Call( ErrFlushFileBuffers( iofrBlockCache, ffmDataOnly ) ); // mark the file as attached by retaining the cached file header. this will allow cache write through / write back // to the cached file to occur diff --git a/dev/ese/src/os/blockcache/_filefilterwrapper.hxx b/dev/ese/src/os/blockcache/_filefilterwrapper.hxx index 1470fb94..e3d4fef4 100644 --- a/dev/ese/src/os/blockcache/_filefilterwrapper.hxx +++ b/dev/ese/src/os/blockcache/_filefilterwrapper.hxx @@ -43,7 +43,7 @@ class TFileFilterWrapper // cff _In_opt_ const DWORD_PTR keyIOComplete, _In_opt_ const IFileAPI::PfnIOHandoff pfnIOHandoff ) override; ERR ErrIOIssue() override; - ERR ErrFlushFileBuffers( _In_ const IOFLUSHREASON iofr ) override; + ERR ErrFlushFileBuffers( _In_ const IOFLUSHREASON iofr, _In_ const IFileAPI::FileFlushMode ffm ) override; public: // IFileFilter @@ -71,7 +71,9 @@ class TFileFilterWrapper // cff _In_opt_ const DWORD_PTR keyIOComplete, _In_opt_ const IFileAPI::PfnIOHandoff pfnIOHandoff ) override; ERR ErrIssue( _In_ const IFileFilter::IOMode iom ) override; - ERR ErrFlush( _In_ const IOFLUSHREASON iofr, _In_ const IFileFilter::IOMode iom ) override; + ERR ErrFlush( _In_ const IOFLUSHREASON iofr, + _In_ const IFileAPI::FileFlushMode ffm, + _In_ const IFileFilter::IOMode iom ) override; private: @@ -196,9 +198,9 @@ ERR TFileFilterWrapper::ErrIOIssue() } template< class I > -ERR TFileFilterWrapper::ErrFlushFileBuffers( _In_ const IOFLUSHREASON iofr ) +ERR TFileFilterWrapper::ErrFlushFileBuffers( _In_ const IOFLUSHREASON iofr, _In_ const IFileAPI::FileFlushMode ffm ) { - return m_piInner->ErrFlushFileBuffers( iofr ); + return m_piInner->ErrFlushFileBuffers( iofr, ffm ); } template< class I > @@ -315,9 +317,11 @@ ERR TFileFilterWrapper::ErrIssue( _In_ const IFileFilter::IOMode iom ) } template< class I > -ERR TFileFilterWrapper::ErrFlush( _In_ const IOFLUSHREASON iofr, _In_ const IFileFilter::IOMode iom ) +ERR TFileFilterWrapper::ErrFlush( _In_ const IOFLUSHREASON iofr, + _In_ const IFileAPI::FileFlushMode ffm, + _In_ const IFileFilter::IOMode iom ) { - return m_piInner->ErrFlush( iofr, iom ); + return m_piInner->ErrFlush( iofr, ffm, iom ); } // CFileFilterWrapper: concrete TFileFilterWrapper. diff --git a/dev/ese/src/os/blockcache/_filewrapper.hxx b/dev/ese/src/os/blockcache/_filewrapper.hxx index f90671be..06a1f105 100644 --- a/dev/ese/src/os/blockcache/_filewrapper.hxx +++ b/dev/ese/src/os/blockcache/_filewrapper.hxx @@ -23,7 +23,8 @@ class TFileWrapper // fw IFileAPI::FileModeFlags Fmf() const; - ERR ErrFlushFileBuffers( const IOFLUSHREASON iofr ); + ERR ErrFlushFileBuffers( _In_ const IOFLUSHREASON iofr, _In_ const IFileAPI::FileFlushMode ffm ); + LONG64 CioNonFlushed() const; void SetNoFlushNeeded(); ERR ErrPath( _Out_bytecap_c_(cbOSFSAPI_MAX_PATHW) WCHAR* const wszAbsPath ); @@ -104,8 +105,6 @@ class TFileWrapper // fw ERR ErrDiskId( ULONG_PTR* const pulDiskId ) const; - LONG64 CioNonFlushed() const; - BOOL FSeekPenalty() const; #ifdef DEBUG @@ -457,9 +456,15 @@ IFileAPI::FileModeFlags TFileWrapper::Fmf() const } template< class I > -ERR TFileWrapper::ErrFlushFileBuffers( const IOFLUSHREASON iofr ) +ERR TFileWrapper::ErrFlushFileBuffers( _In_ const IOFLUSHREASON iofr, _In_ const IFileAPI::FileFlushMode ffm ) { - return m_piInner->ErrFlushFileBuffers( iofr ); + return m_piInner->ErrFlushFileBuffers( iofr, ffm ); +} + +template< class I > +LONG64 TFileWrapper::CioNonFlushed() const +{ + return m_piInner->CioNonFlushed(); } template< class I > @@ -731,12 +736,6 @@ ERR TFileWrapper::ErrDiskId( ULONG_PTR* const pulDiskId ) const return m_piInner->ErrDiskId( pulDiskId ); } -template< class I > -LONG64 TFileWrapper::CioNonFlushed() const -{ - return m_piInner->CioNonFlushed(); -} - template< class I > BOOL TFileWrapper::FSeekPenalty() const { diff --git a/dev/ese/src/os/blockcache/_fsfilter.hxx b/dev/ese/src/os/blockcache/_fsfilter.hxx index 9907a7fe..459b01ce 100644 --- a/dev/ese/src/os/blockcache/_fsfilter.hxx +++ b/dev/ese/src/os/blockcache/_fsfilter.hxx @@ -2110,7 +2110,7 @@ ERR TFileSystemFilter::ErrDetachFile( _In_ CFileFilter* const NULL, NULL ) ); fPresumeDetached = fTrue; - Call( pff->ErrFlush( iofrBlockCache, iomRaw ) ); + Call( pff->ErrFlush( iofrBlockCache, ffmDataOnly, iomRaw ) ); // ask the cache to close the file // diff --git a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx index 84e4f715..3dffc3cf 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx @@ -2246,7 +2246,7 @@ class THashedLRUKCache if ( pwb == m_ilWriteBack.NextMost() || pwb->Pcfte() != m_ilWriteBack.Next( pwb )->Pcfte() ) { - const ERR errFlush = pwb->Pcfte()->Pff()->ErrFlush( iofrBlockCache, iomCacheWriteThrough ); + const ERR errFlush = pwb->Pcfte()->Pff()->ErrFlush( iofrBlockCache, ffmDataOnly, iomCacheWriteThrough ); for ( CWriteBack* pwbT = pwb; pwbT && pwbT->Pcfte() == pwb->Pcfte(); pwbT = m_ilWriteBack.Prev( pwbT ) ) @@ -3090,7 +3090,7 @@ class THashedLRUKCache // ensure that all state referred to by the journal entries we are about to truncate is durable - Call( m_pc->PffCaching()->ErrFlushFileBuffers( iofrBlockCache ) ); + Call( m_pc->PffCaching()->ErrFlushFileBuffers( iofrBlockCache, ffmDataOnly ) ); // truncate the journal @@ -3257,7 +3257,7 @@ class THashedLRUKCache // advance the durable pointer (if not already done) Call( m_pjInner->ErrFlush() ); - Call( m_pc->PffCaching()->ErrFlushFileBuffers( iofrBlockCache ) ); + Call( m_pc->PffCaching()->ErrFlushFileBuffers( iofrBlockCache, ffmDataOnly ) ); } if ( m_jposLastEnd != jposInvalid || cbAvail >= cbJournalFullAndDurable ) @@ -3268,7 +3268,7 @@ class THashedLRUKCache Call( err == JET_errDiskFull ? JET_errSuccess : err ); Call( m_pjInner->ErrFlush() ); - Call( m_pc->PffCaching()->ErrFlushFileBuffers( iofrBlockCache ) ); + Call( m_pc->PffCaching()->ErrFlushFileBuffers( iofrBlockCache, ffmDataOnly ) ); } // ask the cache to flush all its state up to the write back pointer @@ -3284,7 +3284,7 @@ class THashedLRUKCache Call( m_pjInner->ErrAppendEntry( _countof( rgjbEmpty ), rgjbEmpty, &jposEmpty, &jposEmptyEnd ) ); Call( m_pjInner->ErrFlush() ); - Call( m_pc->PffCaching()->ErrFlushFileBuffers( iofrBlockCache ) ); + Call( m_pc->PffCaching()->ErrFlushFileBuffers( iofrBlockCache, ffmDataOnly ) ); HandleError: if ( err < JET_errSuccess ) @@ -3329,7 +3329,7 @@ class THashedLRUKCache // flush the caching file - Call( m_pc->PffCaching()->ErrFlushFileBuffers( iofrBlockCache ) ); + Call( m_pc->PffCaching()->ErrFlushFileBuffers( iofrBlockCache, ffmDataOnly ) ); HandleError: delete pfje; @@ -5301,7 +5301,7 @@ ERR THashedLRUKCache::ErrCreate() // flush the caching file - Call( PffCaching()->ErrFlushFileBuffers( iofrBlockCache ) ); + Call( PffCaching()->ErrFlushFileBuffers( iofrBlockCache, ffmAll ) ); HandleError: delete pch; @@ -5667,7 +5667,7 @@ ERR THashedLRUKCache::ErrDestage( _In_ const VolumeId // flush the cached file and mark the write backs as durable - Call( pcfte->Pff()->ErrFlush( iofrBlockCache, iomCacheWriteThrough ) ); + Call( pcfte->Pff()->ErrFlush( iofrBlockCache, ffmDataOnly, iomCacheWriteThrough ) ); for ( CWriteBack* pwb = ilWriteBack.PrevMost(); pwb; pwb = ilWriteBack.Next( pwb ) ) { diff --git a/dev/ese/src/os/blockcache/_hashedlrukcachewritecountsmanager.hxx b/dev/ese/src/os/blockcache/_hashedlrukcachewritecountsmanager.hxx index eada6427..c462b35b 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcachewritecountsmanager.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcachewritecountsmanager.hxx @@ -276,7 +276,7 @@ INLINE ERR TCachedBlockWriteCountsManager::ErrSave() // flush the write - Call( m_pff->ErrFlushFileBuffers( iofrBlockCache ) ); + Call( m_pff->ErrFlushFileBuffers( iofrBlockCache, ffmDataOnly ) ); // move to the new write set diff --git a/dev/ese/src/os/blockcache/_journalsegmentmanager.hxx b/dev/ese/src/os/blockcache/_journalsegmentmanager.hxx index 14608e7b..ea63903a 100644 --- a/dev/ese/src/os/blockcache/_journalsegmentmanager.hxx +++ b/dev/ese/src/os/blockcache/_journalsegmentmanager.hxx @@ -516,7 +516,7 @@ HandleError: template< class I > INLINE ERR TJournalSegmentManager::ErrFlush() { - return m_pff->ErrFlushFileBuffers( iofrBlockCache ); + return m_pff->ErrFlushFileBuffers( iofrBlockCache, ffmDataOnly ); } template< class I > diff --git a/dev/ese/src/os/osfile.cxx b/dev/ese/src/os/osfile.cxx index 9462be61..2152daa4 100644 --- a/dev/ese/src/os/osfile.cxx +++ b/dev/ese/src/os/osfile.cxx @@ -548,7 +548,7 @@ COSFile::~COSFile() #ifdef OS_LAYER_VIOLATIONS AssertSz( fFalse, "All ESE-level files should be completely flushed by file close." ); #endif - (void)ErrFlushFileBuffers( (IOFLUSHREASON) 0x00800000 /* iofrDefensiveCloseFlush not available */ ); + (void)ErrFlushFileBuffers( (IOFLUSHREASON) 0x00800000 /* iofrDefensiveCloseFlush not available */, ffmAll ); } // tear down our volume @@ -714,7 +714,46 @@ ERR COSFile::ErrIsReadOnly( BOOL* const pfReadOnly ) extern HaDbFailureTag OSDiskIIOHaTagOfErr( const ERR err, const BOOL fWrite ); #endif -ERR COSFile::ErrFlushFileBuffers( const IOFLUSHREASON iofr ) +typedef __success( return >= 0 ) LONG NTSTATUS; + +typedef struct _IO_STATUS_BLOCK { + union { + NTSTATUS Status; + PVOID Pointer; + } DUMMYUNIONNAME; + + ULONG_PTR Information; +} IO_STATUS_BLOCK, * PIO_STATUS_BLOCK; + +#define NT_SUCCESS(Status) (((NTSTATUS)(Status)) >= 0) + +__kernel_entry NTSYSCALLAPI +NTSTATUS +NTAPI +NtFlushBuffersFileEx( + _In_ HANDLE FileHandle, + _In_ ULONG Flags, + _In_reads_bytes_( ParametersSize ) PVOID Parameters, + _In_ ULONG ParametersSize, + _Out_ PIO_STATUS_BLOCK IoStatusBlock +); + +#define FLUSH_FLAGS_FILE_DATA_ONLY 0x00000001 // Win8 +#define FLUSH_FLAGS_NO_SYNC 0x00000002 // Win8 +#define FLUSH_FLAGS_FILE_DATA_SYNC_ONLY 0x00000004 // Win10 RS1 + +static NTOSFuncNtStd( g_pfnNtFlushBuffersFileEx, g_mwszzNtdllLibs, NtFlushBuffersFileEx, oslfExpectedOnWin8 ); + +NTSYSAPI +ULONG +NTAPI +RtlNtStatusToDosError( + _In_ NTSTATUS Status +); + +static NTOSFuncNtStd( g_pfnRtlNtStatusToDosError, g_mwszzNtdllLibs, RtlNtStatusToDosError, oslfExpectedOnWin5x ); + +ERR COSFile::ErrFlushFileBuffers( _In_ const IOFLUSHREASON iofr, _In_ const FileFlushMode ffm ) { ERR err = JET_errSuccess; @@ -775,13 +814,18 @@ ERR COSFile::ErrFlushFileBuffers( const IOFLUSHREASON iofr ) SetLastError( ERROR_BROKEN_PIPE /* hopefully odd enough to cause people to look at code */ ); } - DWORD error = ERROR_SUCCESS; + NTSTATUS status = 0; + ULONG flags = ffm == ffmDataOnly ? FLUSH_FLAGS_FILE_DATA_SYNC_ONLY : 0; + IO_STATUS_BLOCK iosb = { }; + DWORD error = ERROR_SUCCESS; // CONSIDER: Should we have some sort of locking at COSFile or COSDisk on running // concurrent FFB calls? I can't find any documentation that it is not supported, // so it is only a potentially inefficiency (pointless) - if ( !fFaultedFlushSucceeded || !FlushFileBuffers( m_hFile ) ) + if ( !fFaultedFlushSucceeded || + !NT_SUCCESS( status = g_pfnNtFlushBuffersFileEx( m_hFile, flags, NULL, 0, &iosb ) ) ) { + SetLastError( g_pfnRtlNtStatusToDosError( status ) ); error = GetLastError(); err = ErrOSFileIFromWinError( error ); Assert( ERROR_IO_PENDING != error ); // not bad, just unexpected From 08ff7002dab5f525f2d3901277909eb6404eb347 Mon Sep 17 00:00:00 2001 From: Build Team Date: Wed, 12 Oct 2022 21:23:22 +0000 Subject: [PATCH 059/102] This is a fast revert. The justification for the revert is: This set of changes is causing massive test failures, and already 77 M soft asserts in inner ring flooding optics. The reverted PR age: 14.62 hours old. [Substrate:f2621eb76e5e186baae820e8ec1ec642909dd059] --- dev/ese/src/ese/_osu/hapublishu.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/ese/src/ese/_osu/hapublishu.cxx b/dev/ese/src/ese/_osu/hapublishu.cxx index 05c4cf1c..57eea8ef 100644 --- a/dev/ese/src/ese/_osu/hapublishu.cxx +++ b/dev/ese/src/ese/_osu/hapublishu.cxx @@ -26,7 +26,7 @@ void OSUHAPublishEvent_( // update: why? if ( pinst == NULL || pinstNil == pinst ) { - //FireWall( "SkipFi2NoInst" ); + FireWall( "SkipFi2NoInst" ); fEmit = fFalse; } From e8ff72dfef3869f4c5d4b4de774186113f211c5c Mon Sep 17 00:00:00 2001 From: Build Team Date: Wed, 12 Oct 2022 21:27:49 +0000 Subject: [PATCH 060/102] This is a fast revert. The justification for the revert is: This set of changes is causing massive test failures, and already 77 M soft asserts in SDFv2 flooding optics. The reverted PR age: 34.26 hours old. [Substrate:186d8b4250635759de174b3c647a4bf8079a43a7] --- dev/ese/published/inc/os/error.hxx | 11 +- dev/ese/published/inc/os/hapublish.hxx | 4 - dev/ese/src/_res/jetmsg.mc | 11 -- dev/ese/src/ese/_log/logredo.cxx | 10 +- dev/ese/src/ese/_osu/hapublishu.cxx | 56 +------- dev/ese/src/ese/cpage.cxx | 9 ++ dev/ese/src/ese/jetapi.cxx | 173 +------------------------ dev/ese/src/ese/jettest.cxx | 10 +- dev/ese/src/ese/sysinit.cxx | 10 -- dev/ese/src/inc/daedef.hxx | 7 +- dev/ese/src/inc/jettest.hxx | 3 - dev/ese/src/inc/log.hxx | 4 - dev/ese/src/os/edbg.cxx | 99 +++----------- 13 files changed, 48 insertions(+), 359 deletions(-) diff --git a/dev/ese/published/inc/os/error.hxx b/dev/ese/published/inc/os/error.hxx index 0437a6cf..08327908 100644 --- a/dev/ese/published/inc/os/error.hxx +++ b/dev/ese/published/inc/os/error.hxx @@ -523,6 +523,12 @@ public: __forceinline CErrFrameSimple * PefLastThrow(); +__forceinline ERR ErrERRSetLastThrow( _In_ const CHAR* szFile, _In_ const LONG lLine, _In_ const ERR err ) +{ + PefLastThrow()->Set( szFile, lLine, err ); + return err; +} + // Returns the line of the last call that failed out w/ an error, presumably within this frame. ULONG UlLineLastCall(); @@ -563,10 +569,7 @@ ERR ErrERRCheck_( const ERR err, const CHAR* szFile, const LONG lLine ); __forceinline ERR ErrERRCheck_( _In_ const ERR err, _In_ const CHAR* szFile, _In_ const LONG lLine ) { extern ERR g_errTrap; - if ( err < 0 /* JET_errSuccess */ ) - { - PefLastThrow()->Set( szFile, lLine, err ); - } + PefLastThrow()->Set( szFile, lLine, err ); if ( g_errTrap == err ) { KernelDebugBreakPoint(); diff --git a/dev/ese/published/inc/os/hapublish.hxx b/dev/ese/published/inc/os/hapublish.hxx index b0a24727..93929b87 100644 --- a/dev/ese/published/inc/os/hapublish.hxx +++ b/dev/ese/published/inc/os/hapublish.hxx @@ -8,10 +8,6 @@ #include #include "exdbmsg_ese.h" -const DWORD bitHaPublishedEvent = 0x1; -const DWORD bitHaPublishedCorruptionTag = 0x2; -const DWORD bitHaPublishedIoHardTag = 0x4; - #define Ese2HaId( id ) ( HADBFAILURE_EVENT_RANGE_START_ESE + ( id ) ) #define OSUHAPublishEvent( p0, p1, p2, p3, p4, p5, p6, p7, p8, p9 ) \ diff --git a/dev/ese/src/_res/jetmsg.mc b/dev/ese/src/_res/jetmsg.mc index 69a4ed34..fc64cb61 100644 --- a/dev/ese/src/_res/jetmsg.mc +++ b/dev/ese/src/_res/jetmsg.mc @@ -215,17 +215,6 @@ Language=English %1 (%2) %3The specific ESE configuration store is locked in a read inhibit state, clear the %1 registry value to enable ESE to continue and utilize the config store. . -MessageId=109 -SymbolicName=START_INSTANCE_FAILED_ID -Language=English -%1 (%2) %3The database engine failed to start instance (%4) due to error %5. (Time=%6 seconds) -%n -Failure Details:%n -Mode: %7%n -Fail Address: %8%n -Publishing: %9%n -. - ;// You are almost assuredly not adding in the right place? diff --git a/dev/ese/src/ese/_log/logredo.cxx b/dev/ese/src/ese/_log/logredo.cxx index dab4a20a..0f8f0a51 100644 --- a/dev/ese/src/ese/_log/logredo.cxx +++ b/dev/ese/src/ese/_log/logredo.cxx @@ -3570,10 +3570,6 @@ ERR LOG::ErrLGRIRedoNodeOperation( const LRNODE_ *plrnode, ERR *perr ) Assert( (ULONG)data.Cb() == cbNewData ); if ( (ULONG)data.Cb() != cbNewData ) { - // per analysis of a real world case, it is hard to imagine how local (passive) data or remote (active) - // database data generated this incorrectness. This is literally saying the ib/cb pairs do NOT add up - // to the final record size (from the active). This almost assuredly means that there was a corruption - // of the actual log record data. Or a bug in our diff creation or reconstruction alg. OSUHAEmitFailureTag( m_pinst, HaDbFailureTagCorruption, L"a3cb57b9-8ba1-496d-a6fc-4fc2f0140fc4" ); Error( ErrERRCheck( JET_errLogCorrupted ) ); } @@ -5287,7 +5283,7 @@ ERR LOG::ErrLGRICheckRedoAttachDb( goto HandleError; } } - else if ( FErrIsDbCorruption( err ) ) + else if ( JET_errReadVerifyFailure == err ) { reason = eDARHeaderCorrupt; if ( pfmp->FIgnoreDeferredAttach() ) @@ -5297,8 +5293,8 @@ ERR LOG::ErrLGRICheckRedoAttachDb( } else { - // the DB file header is corrupt - OSUHAEmitFailureTag( m_pinst, HaDbFailureTagCorruption, L"9106f5c1-2f93-479b-a12a-c93c6ab3de68" ); + // the log file header is corrupt + OSUHAEmitFailureTag( m_pinst, HaDbFailureTagRecoveryRedoLogCorruption, L"9106f5c1-2f93-479b-a12a-c93c6ab3de68" ); goto HandleError; } } diff --git a/dev/ese/src/ese/_osu/hapublishu.cxx b/dev/ese/src/ese/_osu/hapublishu.cxx index 57eea8ef..e0d0cdad 100644 --- a/dev/ese/src/ese/_osu/hapublishu.cxx +++ b/dev/ese/src/ese/_osu/hapublishu.cxx @@ -20,23 +20,8 @@ void OSUHAPublishEvent_( DWORD cParameter, const WCHAR** rgwszParameter ) { - BOOL fEmit = fTrue; - // failure events need not be published if there is no instance - // update: why? - if ( pinst == NULL || pinstNil == pinst ) - { - FireWall( "SkipFi2NoInst" ); - fEmit = fFalse; - } - - if ( !UlParam( pinst, JET_paramEnableHaPublish ) ) - { - // might be nice to Assert/FireWall not O365 Datacenter / Store.worker, but a bit of a layer violation - fEmit = fFalse; - } - - if ( fEmit ) + if ( pinstNil != pinst && UlParam( pinst, JET_paramEnableHaPublish ) ) { OSUHAPublishEventImpl( haTag, pinst->m_wszInstanceName, @@ -146,27 +131,10 @@ void OSUHAEmitFailureTag_( } } - // FUTURE: HA Publish is only for O365 datacenter, but even so this is a bit of a layering violation. We will - // add these temporarily to do a basic health check on O365 to see if we're dropping HA FailureItems from any ESE - // code paths. - const BOOL fO365StoreWorker = ( _wcsicmp( WszUtilProcessName(), L"Microsoft.Exchange.Store.Worker" ) == 0 ); - const BOOL fO365DatacenterProcess = - fO365StoreWorker || - ( _wcsicmp( WszUtilProcessName(), L"MSExchangeRepl" ) == 0 ) || - ( _wcsicmp( WszUtilProcessName(), L"EdgeTransport" ) == 0 ) || - ( _wcsicmp( WszUtilProcessName(), L"Microsoft.Exchange.DxStore.HA.Instance" ) == 0 ) || - ( _wcsicmp( WszUtilProcessName(), L"Microsoft.Exchange.SharedCache" ) == 0 ) || - ( _wcsicmp( WszUtilProcessName(), L"Microsoft.Exchange.Store.Service" ) == 0 ); // calls JET APIs, but should not actually start ese inst - // should we add eseutil? - // if the instance pointer is NULL then do not emit an event // if ( !pinstActual ) { - if ( !FInEmbeddedUnitTest() ) - { - FireWall( "SkipFiNoInstActualX" ); - } fEmit = fFalse; } @@ -174,10 +142,6 @@ void OSUHAEmitFailureTag_( // if ( pinstActual && !UlParam( pinstActual, JET_paramEnableHaPublish ) ) { - if ( fO365StoreWorker ) - { - FireWall( "SkipFiHaPublishOff" ); - } fEmit = fFalse; } @@ -187,11 +151,6 @@ void OSUHAEmitFailureTag_( ( !pinstActual->m_wszInstanceName || !pinstActual->m_wszInstanceName[ 0 ] || !pinstActual->m_wszDisplayName || !pinstActual->m_wszDisplayName[ 0 ] ) ) { - // many test processes have this off, but all real ESE instances should be correctly identified. - if ( fO365DatacenterProcess ) - { - FireWall( "SkipFiNoInstOrDispName" ); - } fEmit = fFalse; } @@ -199,7 +158,6 @@ void OSUHAEmitFailureTag_( // if ( haTag == HaDbFailureTagNoOp ) { - FireWall( "SkipFiTagNoOp" ); fEmit = fFalse; } @@ -207,7 +165,6 @@ void OSUHAEmitFailureTag_( // if ( !wszGuid || !wszGuid[ 0 ] ) { - FireWall( "SkipFiNoGuid" ); fEmit = fFalse; } @@ -219,7 +176,6 @@ void OSUHAEmitFailureTag_( HA_NOOP_FAILURE_TAG_ID + msgidOffset <= HA_NOOP_FAILURE_TAG_ID || HA_NOOP_FAILURE_TAG_ID + msgidOffset > HA_MAX_FAILURE_TAG_ID ) { - FireWall( "SkipFiEvtOutOfRange" ); fEmit = fFalse; } @@ -258,16 +214,6 @@ void OSUHAEmitFailureTag_( HA_NOOP_FAILURE_TAG_ID + msgidOffset, iwsz, rgwsz ); - - AtomicExchangeSet( (ULONG*)&pinst->m_grbitHaFailureTags, (ULONG)bitHaPublishedEvent ); - if ( haTag == HaDbFailureTagCorruption ) - { - AtomicExchangeSet( (ULONG*)&pinst->m_grbitHaFailureTags, (ULONG)bitHaPublishedCorruptionTag ); - } - if ( haTag == HaDbFailureTagIoHard ) - { - AtomicExchangeSet( (ULONG*)&pinst->m_grbitHaFailureTags, (ULONG)bitHaPublishedIoHardTag ); - } } // cleanup diff --git a/dev/ese/src/ese/cpage.cxx b/dev/ese/src/ese/cpage.cxx index 7e1476d3..d2e39137 100644 --- a/dev/ese/src/ese/cpage.cxx +++ b/dev/ese/src/ese/cpage.cxx @@ -4912,14 +4912,20 @@ ERR CPAGE::ErrCheckPage( // The whole line is starting above the data start, i.e. off the data section, and possibly even off page. MakeCorruptionDetailsSz( L"TAG %d computed offset starts too high (ib=%d, cb=%d, %p > %p)", itag, ib, cb, line.pv, pbPageDataEnd ); (*pcprintf)( "%ws\r\n", wszCorruptionDetails ); + PageAssertTrack( *this, fFalse, "LineEntirelyAboveDataSection" ); +#ifdef DEBUG Error( ErrCaptureCorruptedPageInfoSz( mode, L"LineEntirelyAboveDataSection", wszCorruptionDetails, fLogEvent ) ); +#endif } if ( pbLineLastByte > pbPageDataEnd ) { // The line ends above the data start, i.e. off the data section, but does start / overlaping in valid data section. MakeCorruptionDetailsSz( L"TAG %d computed offset starts too high (ib=%d, cb=%d, %p > %p)", itag, ib, cb, pbLineLastByte, pbPageDataEnd ); (*pcprintf)( "%ws\r\n", wszCorruptionDetails ); + PageAssertTrack( *this, FNegTest( fCorruptingPageLogically ), "LineEndsAboveDataSection" ); +#ifdef DEBUG Error( ErrCaptureCorruptedPageInfoSz( mode, L"LineEndsAboveDataSection", wszCorruptionDetails, fLogEvent ) ); +#endif } if ( errGetLine < JET_errSuccess || !FOnData( line.pv, line.cb ) ) @@ -4931,7 +4937,10 @@ ERR CPAGE::ErrCheckPage( MakeCorruptionDetailsSz( L"UNCAUGHT: TAG %d ErrGetPtr() failed or got line off page (ib=%d, cb=%d, err=%d,f=%d).", itag, ib, cb, errGetLine, FOnData( line.pv, line.cb ) ); (*pcprintf)( "%ws\r\n", wszCorruptionDetails ); // there should not be too many errors coming from ErrGetLine() that we can't embed the err in the corruption type. + PageAssertTrack( *this, FNegTest( fCorruptingPageLogically ), "GetLineFailed:%d\n", errGetLine ); +#ifdef DEBUG Error( ErrCaptureCorruptedPageInfoSz( mode, wszGetLineErr, wszCorruptionDetails, fLogEvent ) ); +#endif } // do some simple KEYDATAFLAGS checks diff --git a/dev/ese/src/ese/jetapi.cxx b/dev/ese/src/ese/jetapi.cxx index e49369ff..425c01a2 100644 --- a/dev/ese/src/ese/jetapi.cxx +++ b/dev/ese/src/ese/jetapi.cxx @@ -1549,92 +1549,6 @@ VOID PERFSetDatabaseNames( IFileSystemAPI* const pfsapi ) } -#ifdef ENABLE_MICROSOFT_MANAGED_DATACENTER_LEVEL_OPTICS - -// -// Trace to an IRS.RAW the init failure. -// - -void DumpFailedInitToIrsRaw( - _In_ INST * pinst, - _In_ PCWSTR wszInstDisplayName, - _In_ PCWSTR wszErrorState, - _In_ PCWSTR wszSeconds, - _In_ PCWSTR wszFailingMode, - _In_ PCWSTR wszFailingAddress, - _In_ PCWSTR wszHaPublishingFacts ) -{ - __int64 fileTime; - WCHAR wszDate[32]; - WCHAR wszTime[32]; - size_t cchRequired; - WCHAR wszInstIrsFile[ 5 /* Inst- */ + 3 /* inst log base name */ + 1 ]; - WCHAR wszInstIrsPathBase[ OSFSAPI_MAX_PATH ]; - CPRINTF * pcprintfPageTrace = NULL; - - if ( pinst == NULL || pinst->m_pfsapi == NULL ) - { - FireWall( "InstIrsUnexpectedInitExitBeforeInstOrPfsapiAlloc" ); - return; - } - - if ( ( SzParam( pinst, JET_paramLogFilePath ) == NULL ) || - ( SzParam( pinst, JET_paramLogFilePath )[0] == L'\0' ) ) - { - FireWall( "InstIrsLogPathNotSet" ); - return; - } - - if ( ( SzParam( pinst, JET_paramBaseName ) == NULL ) || - ( SzParam( pinst, JET_paramBaseName )[0] == L'\0' ) ) - { - FireWall( "InstIrsBaseNameNotSet" ); - return; - } - - // make path - // - OSStrCbFormatW( wszInstIrsFile, sizeof( wszInstIrsFile ), L"Inst-%ws", SzParam( pinst, JET_paramBaseName ) ); - ERR errT = pinst->m_pfsapi->ErrPathBuild( - SzParam( pinst, JET_paramLogFilePath ), - wszInstIrsFile, - L"", // ext filled by IRS func / ErrBeginDatabaseIncReseedTracing() - wszInstIrsPathBase, - sizeof( wszInstIrsPathBase ) ); - if ( errT < JET_errSuccess ) - { - FireWall( "InstIrsPathBuildFail" ); - return; - } - - // start tracing (before anything else) - // - errT = ErrBeginDatabaseIncReseedTracing( pinst->m_pfsapi, wszInstIrsPathBase, &pcprintfPageTrace ); - if ( errT < JET_errSuccess ) - { - FireWall( "InstIrsFailedIrsOpen" ); - return; - } - - fileTime = UtilGetCurrentFileTime(); - ErrUtilFormatFileTimeAsTimeWithSeconds( fileTime, wszTime, _countof(wszTime), &cchRequired); - ErrUtilFormatFileTimeAsDate( fileTime, wszDate, _countof(wszDate), &cchRequired); - (*pcprintfPageTrace)( "Begin " __FUNCTION__ "() @ Time %ws %ws\r\n", wszTime, wszDate ); - - // Consider adding ERRFormatIssueSource() to get last error information and Server Version. - (*pcprintfPageTrace)( "JetInit (%ws) Failed with %ws in %ws seconds.\r\n", wszInstDisplayName, wszErrorState, wszSeconds ); - (*pcprintfPageTrace)( "Failing Mode: %ws\r\n", wszFailingMode ); - (*pcprintfPageTrace)( "Failing Address: %ws\r\n", wszFailingAddress ); - (*pcprintfPageTrace)( "HA Pub Facts: %ws\r\n", wszHaPublishingFacts ); - - EndDatabaseIncReseedTracing( &pcprintfPageTrace ); - - return; -} - -#endif // ENABLE_MICROSOFT_MANAGED_DATACENTER_LEVEL_OPTICS - - // // CIsamSequenceDiagLog // @@ -1864,9 +1778,6 @@ __int64 CIsamSequenceDiagLog::UsecTimer( _In_ INT seqBegin, _In_ const INT seqEn return 0; } - Expected( FTriggeredSequence_( 0 ) ); // be odd to have not started sequence and ask for timings - Expected( seqEnd + 1 != m_cseqMax || FTriggeredSequence_( seqEnd ) ); - if ( !FValidSequence_( seqBegin ) || !FValidSequence_( seqEnd ) || seqBegin >= seqEnd || @@ -1882,10 +1793,9 @@ __int64 CIsamSequenceDiagLog::UsecTimer( _In_ INT seqBegin, _In_ const INT seqEn { seqBegin--; } - Expected( seqBegin < seqEnd ); // this should be true unless we had a failure before the 2nd sequence (seq = 1). Let us see if it happens. - + if ( !FTriggeredSequence_( seqBegin ) || - !FTriggeredSequence_( seqEnd ) ) + !FTriggeredSequence_( seqEnd ) ) { return 0; } @@ -3228,7 +3138,7 @@ class CInstanceFileSystemConfiguration : public CDefaultFileSystemConfiguration // initialize this setting if ( m_permillageSmoothIo == dwMax ) { - // Exs: 999� = 99.9% Smooth, 990� = 99.0% Smooth, 900� = 90.0% Smooth. Debug default = 0.2% + // Exs: 999‰ = 99.9% Smooth, 990‰ = 99.0% Smooth, 900‰ = 90.0% Smooth. Debug default = 0.2% ULONG permillageSmoothIo = OnDebugOrRetail( 2, CDefaultFileSystemConfiguration::PermillageSmoothIo() ); if ( m_pinst ) @@ -21570,10 +21480,10 @@ LOCAL JET_ERR JetInitEx( const ULONG cbTimingResourceDataSequence = pinst->m_isdlInit.CbSprintTimings(); WCHAR * wszTimingResourceDataSequence = (WCHAR *)_alloca( cbTimingResourceDataSequence ); pinst->m_isdlInit.SprintTimings( wszTimingResourceDataSequence, cbTimingResourceDataSequence ); - const double secsInit = (double)pinst->m_isdlInit.UsecTimer( eSequenceStart, eInitDone ) / 1000000.0; // convert to seconds - WCHAR wszSeconds[30]; + const __int64 secsInit = pinst->m_isdlInit.UsecTimer( eSequenceStart, eInitDone ) / 1000000; // convert to seconds + WCHAR wszSeconds[16]; WCHAR wszInstId[16]; - OSStrCbFormatW( wszSeconds, sizeof(wszSeconds), L"%.3f", secsInit ); + OSStrCbFormatW( wszSeconds, sizeof(wszSeconds), L"%I64d", secsInit ); OSStrCbFormatW( wszInstId, sizeof(wszInstId), L"%d", IpinstFromPinst( pinst ) ); const WCHAR * rgszT[4] = { wszInstId, wszSeconds, wszTimingResourceDataSequence, wszAdditionalFixedData }; @@ -21610,77 +21520,6 @@ LOCAL JET_ERR JetInitEx( { const WCHAR* wszInstDisplayName = ( pinst != NULL && pinst->m_wszDisplayName != NULL ? pinst->m_wszDisplayName : L"_unknown_" ); OSDiagTrackInit( wszInstDisplayName, pinst->m_plog->QwSignLogHash(), err ); - - // avoiding quick and dirty non-localized insert text on windows -#ifdef ENABLE_MICROSOFT_MANAGED_DATACENTER_LEVEL_OPTICS - - pinst->m_isdlInit.Trigger( eInitDone ); - const double secsInit2 = (double)pinst->m_isdlInit.UsecTimer( eSequenceStart, eInitDone ) / 1000000.0; // convert to seconds - WCHAR wszSeconds2[30]; - OSStrCbFormatW( wszSeconds2, sizeof(wszSeconds2), L"%.3f", secsInit2 ); - - WCHAR wszErrorState[120]; - JET_ERRCAT errcatMostSpecific = JET_errcatUnknown; - (void)ErrERRLookupErrorCategory( err, &errcatMostSpecific ); - if ( PefLastThrow() && err == PefLastThrow()->Err() ) - { - PERSISTED // for optics "(JET_errcat: 10)", etc. see Exch \ EseEventCategorized.cs. - OSStrCbFormatW( wszErrorState, sizeof(wszErrorState), L"%d (JET_errcat: %d) (src: %hs:%d)", err, errcatMostSpecific, SzSourceFileName( PefLastThrow()->SzFile() ), PefLastThrow()->UlLine() ); - } - else - { - PERSISTED // for optics "(JET_errcat: 10)", etc. see Exch \ EseEventCategorized.cs. - OSStrCbFormatW( wszErrorState, sizeof(wszErrorState), L"%d (JET_errcat: %d)", err, errcatMostSpecific ); - } - - WCHAR wszFailingMode[2] = { WchReportInstState( pinst ), L'\0' }; - - WCHAR wszFailingAddress[60]; - // The normal way of detecting recovery \ redo via: - // plog->FRecovering() && plog->FRecoveringMode() == fRecoveringRedo - // is controlled and cleaned up by this point even on an error. However, fortunately - // the pinst->m_perfstatusEvent mode is one way during init, and not reset until next - // call to JetInit() so we use this method for determining what mode we reached. - const BOOL fRedo = pinst->m_perfstatusEvent == perfStatusRecoveryRedo; - const BOOL fUndo = pinst->m_perfstatusEvent == perfStatusRecoveryUndo; - const BOOL fDo = pinst->m_perfstatusEvent == perfStatusRuntime; - // Normal method of getting lpgosRedo (plog->LgposLGLogTipNoLock()) won't work for - // the same reason the regular mode computation, computes it wrong above. But the - // actual lgpos we want is in m_lgposRedo, so use special function to fetch it. - LGPOS lgposFailed = !fUndo ? // just in case, we treat everything besides undo as redo. - pinst->m_plog->LgposDiagnosticRedoFailedAddress() : - pinst->m_plog->LgposLGLogTipNoLock(); // undo address comes from live lgpos tip. - // Can imagine actually sticking other pieces of address in here, like the pgno the LR was - // referencing, or even logical descriptions like "DbfilehdrReadErr" or something. - OSStrCbFormatW( wszFailingAddress, sizeof( wszFailingAddress ), - L"lgpos%hs:%08x:%04x:%04x", - fRedo ? "Redo" : - ( fUndo ? "Undo" : - ( fDo ? "RedoOld" : - "Redo-Unconfirmed" ) ), - lgposFailed.lGeneration, lgposFailed.isec, lgposFailed.ib ); - - WCHAR wszHaPublishingFacts[300]; - PERSISTED // for optics "Verbose: 1", "FI Tags Published: 0x", and "FiCorruptionTag ". see Exch \ EseEventCategorized.cs, Exch \ EseDatabaseMonitoringContext.cs - (void)ErrOSStrCbFormatW( wszHaPublishingFacts, sizeof( wszHaPublishingFacts ), L"Verbose: %d, FI Tags Published: 0x%x ( %hs%hs)", - !!pinst->m_isdlInit.FTriggeredStep( eInitLogRecoverySilentRedoDone ), - pinst->m_grbitHaFailureTags, -#if defined( USE_HAPUBLISH_API ) - ( pinst->m_grbitHaFailureTags & bitHaPublishedCorruptionTag ) ? "FiCorruptionTag " : "", - ( pinst->m_grbitHaFailureTags & bitHaPublishedIoHardTag ) ? "FiIoHardTag " : "" ); - ( pinst->m_grbitHaFailureTags & bitHaPublishedIoHardTag ) ? "FiIoHardTag " : "" -#else - "", "" -#endif - ); - - const WCHAR * rgszFailT[6] = { wszInstDisplayName, wszErrorState, wszSeconds2, wszFailingMode, wszFailingAddress, wszHaPublishingFacts }; - - UtilReportEvent( eventError, GENERAL_CATEGORY, START_INSTANCE_FAILED_ID, _countof( rgszFailT ), rgszFailT, 0, NULL, pinst ); - - // Also to avoid event wrap, report failures in JetInit() to .IRS.RAW - DumpFailedInitToIrsRaw( pinst, wszInstDisplayName, wszErrorState, wszSeconds2, wszFailingMode, wszFailingAddress, wszHaPublishingFacts ); -#endif } // if instance allocated in this function call diff --git a/dev/ese/src/ese/jettest.cxx b/dev/ese/src/ese/jettest.cxx index 4868a537..90948cfc 100644 --- a/dev/ese/src/ese/jettest.cxx +++ b/dev/ese/src/ese/jettest.cxx @@ -5,9 +5,7 @@ #include "PageSizeClean.hxx" -#ifndef ENABLE_JET_UNIT_TEST -#error "File jettest.cxx is only supposed to be referenced / compiled in the unit test .vcxproj file." -#endif +#ifdef ENABLE_JET_UNIT_TEST #include @@ -539,7 +537,5 @@ void JetTestEnforceSEHException::Cleanup() s_pThreadExcep = NULL; } -BOOL FInEmbeddedUnitTest() -{ - return fTrue; -} +#endif // ENABLE_JET_UNIT_TEST + diff --git a/dev/ese/src/ese/sysinit.cxx b/dev/ese/src/ese/sysinit.cxx index 94272cd9..d14c0424 100644 --- a/dev/ese/src/ese/sysinit.cxx +++ b/dev/ese/src/ese/sysinit.cxx @@ -11,16 +11,6 @@ BOOL g_fDBGPerfOutput = fFalse; #endif /* DEBUG || PERFDUMP */ -// This is here, because jettest.cxx is only compiled in eselibwithtest.dll - -#ifndef ENABLE_JET_UNIT_TEST - -BOOL FInEmbeddedUnitTest() -{ - return fFalse; -} - -#endif // ENABLE_JET_UNIT_TEST #ifdef DEBUG diff --git a/dev/ese/src/inc/daedef.hxx b/dev/ese/src/inc/daedef.hxx index 6cf79464..8c44620d 100644 --- a/dev/ese/src/inc/daedef.hxx +++ b/dev/ese/src/inc/daedef.hxx @@ -3465,7 +3465,7 @@ INLINE ERR DBFILEHDR::DumpLite( CPRINTF* pcprintf, const char * const szNewLine, (*pcprintf)( "Revert Page Count: %u%s", (ULONG) le_ulRevertPageCount, szNewLine ); lgpos = le_lgposCommitBeforeRevert; - (*pcprintf)( "Last Commit Before Revert: (0x%X,%X,%X) %s", lgpos.lGeneration, lgpos.isec, lgpos.ib, szNewLine ); + (*pcprintf)( "Last Commit Before Revert: (0x%X,%X,%X) ", lgpos.lGeneration, lgpos.isec, lgpos.ib ); return JET_errSuccess; } @@ -5032,9 +5032,6 @@ public: BOOL m_fTermInProgress; BOOL m_fTermAbruptly; INST_STINIT m_fSTInit; - // Note: This status is not cleaned up if we fail in middle of Redo, Undo and this fact - // is used at end of JetInitEx() to log what mode we failed in. So do not reset this on - // error paths INT m_perfstatusEvent; // Redo, Undo, Runtime/Do-time, and Term. BOOL m_fBackupAllowed; @@ -5236,8 +5233,6 @@ public: CIsamSequenceDiagLog m_isdlInit; CIsamSequenceDiagLog m_isdlTerm; - volatile DWORD m_grbitHaFailureTags; - private: ERR ErrAPIAbandonEnter_( const LONG lOld ); diff --git a/dev/ese/src/inc/jettest.hxx b/dev/ese/src/inc/jettest.hxx index 92c623dc..eebb9b53 100644 --- a/dev/ese/src/inc/jettest.hxx +++ b/dev/ese/src/inc/jettest.hxx @@ -356,8 +356,5 @@ void Test##component##test::Run_() #endif // !ENABLE_JET_UNIT_TEST -// defined in both ese.dll and eselibwithtest.dll, but returning different answer -BOOL FInEmbeddedUnitTest(); - #endif // JETTEST_HXX_INCLUDED diff --git a/dev/ese/src/inc/log.hxx b/dev/ese/src/inc/log.hxx index 8eaf7b02..27c685a0 100644 --- a/dev/ese/src/inc/log.hxx +++ b/dev/ese/src/inc/log.hxx @@ -1285,10 +1285,6 @@ public: BOOL FLastLRIsShutdown() const { return m_fLastLRIsShutdown; } LGPOS LgposShutDownMark() const { return m_lgposRedoShutDownMarkGlobal; } - // Note: Generally people should not be interested in lgposRedo, but failure event code has a need to - // know it directly. - LGPOS LgposDiagnosticRedoFailedAddress() const { return m_lgposRedo; } - VOID LGRRemoveFucb( FUCB * pfucb ); ERR ErrLGMostSignificantRecoveryWarning( void ); diff --git a/dev/ese/src/os/edbg.cxx b/dev/ese/src/os/edbg.cxx index 73058a7a..c81f26bc 100644 --- a/dev/ese/src/os/edbg.cxx +++ b/dev/ese/src/os/edbg.cxx @@ -665,7 +665,7 @@ const INT cfuncmap = sizeof( rgfuncmap ) / sizeof( EDBGFUNCMAP ); #define DUMPA( _struct ) { #_struct, &(CDUMPA<_struct>::instance), #_struct "
" } -#define DUMPAA( _struct, addlargs ) { #_struct, &(CDUMPA<_struct>::instance), #_struct "
" addlargs } +#define DUMPAA( _struct, addlargs ) { #_struct, &(CDUMPA<_struct>::instance), #_struct "
" addlargs } // ================================================================ @@ -691,20 +691,20 @@ LOCAL const CDUMPMAP rgcdumpmap[] = { DUMPA( LOG_STREAM ), DUMPA( LOG_WRITE_BUFFER ), DUMPA( VER ), - DUMPAA( MEMPOOL, " [|*] - =specified tag only, *=all tags" ), + DUMPAA( MEMPOOL, "[|*] - =specified tag only, *=all tags" ), DUMPA( SPLIT ), DUMPA( SPLITPATH ), DUMPA( MERGE ), DUMPA( MERGEPATH ), - DUMPAA( DBFILEHDR, "|.|.disk" ), + DUMPA( DBFILEHDR ), { "CDynamicHashTable", &(CDUMPA::instance), "CDynamicHashTable
" }, { "CApproximateIndex", &(CDUMPA::instance), "CApproximateIndex
" }, { "g_bflruk", &(CDUMPA::instance), "g_bflruk ese!g_bflruk" }, - DUMPAA( COSDisk, "|.db|.edb" ), - DUMPAA( COSFile, "|.db|.edb" ), + DUMPA( COSDisk ), + DUMPA( COSFile ), DUMPA( COSFileFind ), DUMPA( COSFileSystem ), - DUMPAA( IOREQ, " [dumpall|norunstats]" ), + DUMPAA( IOREQ, "[dumpall|norunstats]" ), { "PAGE", &(CDUMPA::instance), "PAGE [a|b|h|t|*|2|4|8|16|32] - a=alloc map, b=binary dump, h=header, t=tags, *=all, 2/4/8/16/32=pagesize" }, DUMPA( CResource ), @@ -16117,7 +16117,7 @@ DEBUG_EXT( EDBGDumpDBDiskPage ) dprintf( "Error: Could not read global FMP variables for ifmp = %d.\n", ifmp ); goto HandleError; } - else if ( pgno < 1 ) + else if ( pgno < 1 ) // UNDONE: don't currently support dumping page header { dprintf( "Error: Invalid pgno.\n" ); goto HandleError; @@ -16180,10 +16180,6 @@ DEBUG_EXT( EDBGDumpDBDiskPage ) { VirtualFree( pbPage, 0, MEM_RELEASE ); } - if ( NULL != posf ) - { - Unfetch( posf ); - } } @@ -19131,8 +19127,6 @@ VOID CDUMPA::Dump( { DBFILEHDR * pdbfilehdrDebuggee = NULL; DBFILEHDR * pdbfilehdr = NULL; - COSFile * posf = NULL; - const BOOL fReadFromDisk = ( argc >= 1 || 0 == _stricmp( argv[ 0 ], ".disk" ) ); const CHAR * const szMemDump = "mem"; @@ -19150,81 +19144,24 @@ VOID CDUMPA::Dump( return; } - if ( fReadFromDisk ) + if ( FFetchVariable( pdbfilehdrDebuggee, &pdbfilehdr ) ) { - HANDLE hCurrentProcess; - ULONG64 ulCurrentProcess; - - const ULONG cbPage = Pdls()->CbPage(); - - if ( Pdls()->IfmpCurrent() == ifmpNil || Pdls()->IfmpCurrent() == 0 || - Pdls()->PfmpCache( Pdls()->IfmpCurrent() ) == NULL || - cbPage == 0 ) - { - dprintf( "Something went wrong. To use .disk argument, must have an implicit IFMP set with !ese .db. Or we couldn't load the Pfmp cache or cbPage. (%d, 0x%p, %d)\n", - Pdls()->IfmpCurrent(), ( Pdls()->IfmpCurrent() != 0 && Pdls()->IfmpCurrent() != ifmpNil ) ? Pdls()->PfmpCache( Pdls()->IfmpCurrent() ) : NULL, cbPage ); - goto HandleError; - } + const SIZE_T dwOffset = (BYTE *)pdbfilehdrDebuggee - (BYTE *)pdbfilehdr; - // UNDONE: currently assumes all databases are COSFile - // - if ( !FFetchVariable( (COSFile *)( Pdls()->PfmpCache( Pdls()->IfmpCurrent() ) )->Pfapi(), &posf ) ) + dprintf( "[DBFILEHDR] 0x%p bytes @ 0x%N\n", + QWORD( sizeof( DBFILEHDR ) ), + pdbfilehdrDebuggee ); + if ( fMemDump ) { - dprintf( "Error: Could not read COSFile at 0x%N for specified FMP.\n", ( Pdls()->PfmpCache( Pdls()->IfmpCurrent() ) )->Pfapi() ); - goto HandleError; + (VOID)( pdbfilehdr->Dump( CPRINTFWDBG::PcprintfInstance(), dwOffset ) ); } - - // VirtualAlloc() the buffer to ensure alignment - // - pdbfilehdr = (DBFILEHDR *)VirtualAlloc( NULL, cbPage, MEM_COMMIT, PAGE_READWRITE ); - if ( NULL == pdbfilehdr ) - { - dprintf( "Error: Could not allocate DBFILEHDR buffer (%d bytes) via VA !\n", cbPage ); - goto HandleError; - return; - } - - HRESULT hr = g_DebugSystemObjects->GetCurrentProcessHandle( &ulCurrentProcess ); - hCurrentProcess = (HANDLE) ulCurrentProcess; - if ( FAILED( hr ) ) - { - dprintf( "Failed to fetch process handle: %#x\n", hr ); - goto HandleError; - } - - if ( !FEDBGGetDbDiskPage( hCurrentProcess, posf->Handle(), (PGNO)-1 /* 0 would be shadow header */, (BYTE*)pdbfilehdr, cbPage ) ) - { - dprintf( "Failed to read from disk handle.\n" ); - goto HandleError; - } - dprintf( "Successfully read DBFILEHDR off the disk.\n" ); - if ( pdbfilehdr->le_filetype != JET_filetypeDatabase ) + else { - dprintf( "\nWARNING: The read DBFILEHDR doesn't have JET_filetypeDatabase. Corruption or maybe EBC is enabled. Dumping contents anyways.\n\n" ); + (VOID)( pdbfilehdr->DumpLite( CPRINTFWDBG::PcprintfInstance(), "\n", dwOffset ) ); } + + Unfetch( pdbfilehdr ); } - else if ( !FFetchVariable( pdbfilehdrDebuggee, &pdbfilehdr ) ) - { - dprintf( "Failed to fetch DBFILEHDR memory from debugger process.\n" ); - goto HandleError; - } - - const SIZE_T dwOffset = fReadFromDisk ? 0 : ( (BYTE *)pdbfilehdrDebuggee - (BYTE *)pdbfilehdr ); - - dprintf( "[DBFILEHDR] 0x%p bytes @ 0x%N\n", QWORD( sizeof( DBFILEHDR ) ), pdbfilehdrDebuggee ); - if ( fMemDump ) - { - (VOID)( pdbfilehdr->Dump( CPRINTFWDBG::PcprintfInstance(), dwOffset ) ); - } - else - { - (VOID)( pdbfilehdr->DumpLite( CPRINTFWDBG::PcprintfInstance(), "\n", dwOffset ) ); - } - -HandleError: - - fReadFromDisk ? VirtualFree( pdbfilehdr, 0, MEM_RELEASE ) : Unfetch( pdbfilehdr ); - Unfetch( posf ); } // TrxidStack dumping From f5dab9b94002b06890c01957d138239ca9ee39a6 Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Thu, 13 Oct 2022 16:25:19 +0000 Subject: [PATCH 061/102] ESE Block Cache: Perf: reduce ICacheConfiguration::PctWrite calls We currently call this 3x per read request and 9x per write request in prod. This change caches PctWrite on the CRequest, reducing the calls to 1 per request. This also increases the consistency with which we treat a request wrt the settings. [Substrate:a17f7555e4f21008876d06a30afbf6b5c42214d9] --- dev/ese/src/os/blockcache/_cachebase.hxx | 3 +++ dev/ese/src/os/blockcache/_hashedlrukcache.hxx | 11 ++++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/dev/ese/src/os/blockcache/_cachebase.hxx b/dev/ese/src/os/blockcache/_cachebase.hxx index 3842f6bc..78645892 100644 --- a/dev/ese/src/os/blockcache/_cachebase.hxx +++ b/dev/ese/src/os/blockcache/_cachebase.hxx @@ -171,6 +171,7 @@ class TCacheBase // c m_cp( cp ), m_pfnComplete( pfnComplete ), m_keyComplete( keyComplete ), + m_pctWrite( max( 0, min( 100, pc->Pcconfig()->PctWrite() ) ) ), m_cref( 1 ), m_err( JET_errSuccess ), m_grbitQOSComplete( 0 ), @@ -199,6 +200,7 @@ class TCacheBase // c const BYTE* const PbData() const { return m_pbData; } OSFILEQOS GrbitQOS() const { return m_grbitQOS; } ICache::CachingPolicy Cp() const { return m_cp; } + double PctWrite() const { return m_pctWrite; } BOOL FSync() const { return m_pfnComplete == NULL; } ERR ErrRead( _In_ IFileFilter* const pff, @@ -496,6 +498,7 @@ class TCacheBase // c const ICache::CachingPolicy m_cp; const ICache::PfnComplete m_pfnComplete; const DWORD_PTR m_keyComplete; + const double m_pctWrite; FullTraceContext m_ftc; volatile int m_cref; ERR m_err; diff --git a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx index 3dffc3cf..7c7c6505 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx @@ -150,6 +150,7 @@ class THashedLRUKCache const BYTE* const PbData() const { return THashedLRUKCacheBase::CRequest::PbData(); } OSFILEQOS GrbitQOS() const { return THashedLRUKCacheBase::CRequest::GrbitQOS(); } ICache::CachingPolicy Cp() const { return THashedLRUKCacheBase::CRequest::Cp(); } + double PctWrite() const { return THashedLRUKCacheBase::CRequest::PctWrite(); } ERR ErrStatus() const { return THashedLRUKCacheBase::CRequest::ErrStatus(); } typename CHashedLRUKCachedFileTableEntry::CIORangeLockBase* Piorl() { return &m_iorl; } @@ -1852,7 +1853,7 @@ class THashedLRUKCache m_cbRequested( cbRequested ), m_fOverrideCachePercentage( fOverrideCachePercentage ), m_cbTotal( cbTotal ), - m_pctWrite( max( 0, min( 100, m_pc->Pcconfig()->PctWrite() ) ) ), + m_pctWrite( prequest->PctWrite() ), m_cbWriteCacheMax( (QWORD)( m_cbTotal * m_pctWrite / 100 ) ), m_cbReadCacheMax( m_cbTotal - m_cbWriteCacheMax ), m_cbWriteCache( cbWriteCache ), @@ -8815,7 +8816,7 @@ void THashedLRUKCache::RequestRead( _In_ CRequest* const preq // determine if we should cache this request - const BOOL fCacheIfPossible = prequest->Cp() != cpDontCache && Pcconfig()->PctWrite() < 100; + const BOOL fCacheIfPossible = prequest->Cp() != cpDontCache && prequest->PctWrite() < 100; // loop through the read by cached block potentially crossing many cached file blocks @@ -8942,7 +8943,7 @@ void THashedLRUKCache::RequestFinalizeRead( _In_ CRequest* const // determine if we should cache this request - const BOOL fCacheRequestIfPossible = prequest->Cp() != cpDontCache && Pcconfig()->PctWrite() < 100; + const BOOL fCacheRequestIfPossible = prequest->Cp() != cpDontCache && prequest->PctWrite() < 100; // loop through the read by cached block potentially crossing many cached file blocks @@ -9113,8 +9114,8 @@ void THashedLRUKCache::RequestWrite( _In_ CRequest* const preq // NOTE: we do not cache writes to sparse regions of a file to force them to be reallocated. this is // required to maintain file meta-data parity with uncached files - const BOOL fCacheRequestIfPossible = ( prequest->Cp() != cpDontCache && - Pcconfig()->PctWrite() > 0 && + const BOOL fCacheRequestIfPossible = ( prequest->Cp() != cpDontCache && + prequest->PctWrite() > 0 && !prequest->Pcfte()->FSparse( ibCachedBlock, cbCachedBlock ) ) || prequest->Cp() == cpPinned; From b9e8fdab091e05c9ba941d0541d12c75d99f17e2 Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Fri, 14 Oct 2022 15:29:08 +0000 Subject: [PATCH 062/102] ESE: fix RFS for COSFile::ErrFlushFileBuffers [Substrate:2b8f34ab2627d2780448a6131f0ff7214de06eef] --- dev/ese/src/os/osfile.cxx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dev/ese/src/os/osfile.cxx b/dev/ese/src/os/osfile.cxx index 2152daa4..5aca3413 100644 --- a/dev/ese/src/os/osfile.cxx +++ b/dev/ese/src/os/osfile.cxx @@ -825,7 +825,10 @@ ERR COSFile::ErrFlushFileBuffers( _In_ const IOFLUSHREASON iofr, _In_ const File if ( !fFaultedFlushSucceeded || !NT_SUCCESS( status = g_pfnNtFlushBuffersFileEx( m_hFile, flags, NULL, 0, &iosb ) ) ) { - SetLastError( g_pfnRtlNtStatusToDosError( status ) ); + if ( fFaultedFlushSucceeded ) + { + SetLastError( g_pfnRtlNtStatusToDosError( status ) ); + } error = GetLastError(); err = ErrOSFileIFromWinError( error ); Assert( ERROR_IO_PENDING != error ); // not bad, just unexpected From dd57d9c0a74d3f1f42bd8c6a119b63f3c12bb9ff Mon Sep 17 00:00:00 2001 From: Umair Ahmad Date: Sun, 16 Oct 2022 20:54:19 +0000 Subject: [PATCH 063/102] Fix various failures in ValidateTraceContextInEtwEvents test. 1. Cap number of ops generated by dbmaintstress.exe to prevent it from generating too big of an ETW trace. 2. Fix dbmaintstress to stop complaining about failed tasks when the number of tasks are too low. [Substrate:9257fe0b8dc61a98267a05e7aad203cff85c738c] --- dev/ese/src/ese/fcreate.cxx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dev/ese/src/ese/fcreate.cxx b/dev/ese/src/ese/fcreate.cxx index d2ef7e31..fada7fb9 100644 --- a/dev/ese/src/ese/fcreate.cxx +++ b/dev/ese/src/ese/fcreate.cxx @@ -3015,6 +3015,8 @@ ERR ErrFILECreateTable( PIB *ppib, IFMP ifmp, JET_TABLECREATE5_A *ptablecreate, return JET_errSuccess; HandleError: + Assert( err != JET_errKeyDuplicate ); // should return JET_errTableDuplicate + OSTraceFMP( ifmp, JET_tracetagDDLWrite, From c46f7fe0220ac07b31d1fd341ccc3b89ac35bed6 Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Tue, 18 Oct 2022 15:12:57 +0000 Subject: [PATCH 064/102] ESE Block Cache: TVS fixes for Windows The last one is a bug if the CBucket is > 32 bits which it is. I am not sure why this doesn't currently cause a failure. [Substrate:140a35fab714e4f3ec8a5549ba90367e2d88b305] --- dev/ese/src/os/blockcache/_hashedlrukcache.hxx | 2 +- .../src/os/blockcache/_hashedlrukcacheslab.hxx | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx index 7c7c6505..a580dd07 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx @@ -4469,7 +4469,7 @@ class THashedLRUKCache void SetFingerprint( _In_ const QWORD qwFingerprints, _In_ const size_t i, _In_ const WORD wFingerprint ) { - QWORD qwFingerprintsT = qwFingerprints & ~( QWORD( WFingerprintMask() << ( i * CbitFingerprint() ) ) ); + QWORD qwFingerprintsT = qwFingerprints & ~( QWORD( WFingerprintMask() ) << ( i * CbitFingerprint() ) ); qwFingerprintsT = qwFingerprintsT | ( QWORD( wFingerprint & WFingerprintMask() ) << ( i * CbitFingerprint() ) ); memcpy( m_rgbFingerprint, &qwFingerprintsT, sizeof( m_rgbFingerprint ) ); diff --git a/dev/ese/src/os/blockcache/_hashedlrukcacheslab.hxx b/dev/ese/src/os/blockcache/_hashedlrukcacheslab.hxx index e00ac304..2211a74b 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcacheslab.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcacheslab.hxx @@ -2861,15 +2861,15 @@ int TCachedBlockSlab::CompareSlotsForInit( _In_ const ISlot& islotA, _In_ con const BOOL fValidA = pcblA->FValid(); const BOOL fValidB = pcblB->FValid(); - if ( !fValidA & fValidB ) + if ( ( !fValidA ) & fValidB ) { return -1; } - if ( fValidA & !fValidB ) + if ( fValidA & ( !fValidB ) ) { return 1; } - if ( !fValidA & !fValidB ) + if ( ( !fValidA ) & ( !fValidB ) ) { if ( islotA < islotB ) { @@ -2962,15 +2962,15 @@ int TCachedBlockSlab::CompareSlotsForEvict( _In_ const ISlot& islotA, _In_ co const TouchNumber tono0A = pcblA->Tono0(); const TouchNumber tono0B = pcblB->Tono0(); - if ( !fValidA & fValidB ) + if ( ( !fValidA ) & fValidB ) { return -1; } - if ( fValidA & !fValidB ) + if ( fValidA & ( !fValidB ) ) { return 1; } - if ( !fValidA & !fValidB ) + if ( ( !fValidA ) & ( !fValidB ) ) { if ( tono0A == tonoInvalid && tono0B != tonoInvalid ) { @@ -3008,11 +3008,11 @@ int TCachedBlockSlab::CompareSlotsForEvict( _In_ const ISlot& islotA, _In_ co const BOOL fSupercededA = m_rgfSlotSuperceded[ (size_t)islotA ]; const BOOL fSupercededB = m_rgfSlotSuperceded[ (size_t)islotB ]; - if ( fSupercededA & !fSupercededB ) + if ( fSupercededA & ( !fSupercededB ) ) { return -1; } - if ( !fSupercededA & fSupercededB ) + if ( ( !fSupercededA ) & fSupercededB ) { return 1; } From 2875a4158875858151d51d6d1eeb953ebfa0e026 Mon Sep 17 00:00:00 2001 From: Nathanael Cheriere Date: Tue, 18 Oct 2022 18:03:01 +0000 Subject: [PATCH 065/102] Added control of ESE's cache trace subsampling Added the keyword BFRESMGRSUBSAMPLING to control the subsampling of ESE's cache traces. Traces are subsampled only if the keyword BFRESMGRSUBSAMPLED is set but keywords BFRESMGR, BF, DataWorkingSet, and Performance are not. Moreover, if the sampling ratio is set to 0, no trace is emitted in order to trace only specific databases. [Substrate:cfdd1f5d7bd2377a54bf2a104e52cb20b937a728] --- dev/ese/published/inc/os/oseventtrace.g.hxx | 1 + dev/ese/published/inc/os/oseventtrace.hxx | 6 +- dev/ese/src/_etw/EseEtwEventsPregen.txt | 20 ++--- dev/ese/src/_etw/Microsoft-ETW-ESE.mc | 29 ++++--- dev/ese/src/ese/bf.cxx | 91 +++++++++++++-------- dev/ese/src/ese/sysparamtable.g.cxx | 2 +- dev/ese/src/os/oseventtrace.cxx | 17 ++++ 7 files changed, 110 insertions(+), 56 deletions(-) diff --git a/dev/ese/published/inc/os/oseventtrace.g.hxx b/dev/ese/published/inc/os/oseventtrace.g.hxx index f365af33..92f490b7 100644 --- a/dev/ese/published/inc/os/oseventtrace.g.hxx +++ b/dev/ese/published/inc/os/oseventtrace.g.hxx @@ -110,6 +110,7 @@ enum OSEventTraceKeywordGUID : ULONGLONG _etguidKeywordIOEX = 0x0000000000008000, _etguidKeywordIOSESS = 0x0000000000010000, _etguidKeywordSubstrateTelemetry = 0x0000000000020000, + _etguidKeywordBFRESMGRSUBSAMPLED = 0x0000000000040000, _etguidKeywordCompressExp = 0x0000000100000000, }; diff --git a/dev/ese/published/inc/os/oseventtrace.hxx b/dev/ese/published/inc/os/oseventtrace.hxx index 6d4bf27c..3ba0cab2 100644 --- a/dev/ese/published/inc/os/oseventtrace.hxx +++ b/dev/ese/published/inc/os/oseventtrace.hxx @@ -26,6 +26,8 @@ INLINE BOOL FOSEventTraceEnabled(); template< OSEventTraceKeywordGUID etguid > INLINE BOOL FOSEventTraceKeywordEnabled(); +INLINE BOOL FOSEventTraceAnyKeywordEnabled( const ULONGLONG ullKeywordMask ); + // The first 8 are generic reasons, resused per event, the next 248 are for whatever enum TraceStationIdentificationReason : BYTE // tsidr @@ -140,8 +142,8 @@ template INLINE BOOL COSEventTraceIdCheck::FAnnounceTime< _etguidSysStationId >( template INLINE BOOL COSEventTraceIdCheck::FAnnounceTime< _etguidIsamDbfilehdrInfo >( const TraceStationIdentificationReason tsidr ); template INLINE BOOL COSEventTraceIdCheck::FAnnounceTime< _etguidFmpStationId >( const TraceStationIdentificationReason tsidr ); -// Used to avoid subsampling if the keyword BFRESMGR is not set -template INLINE BOOL FOSEventTraceKeywordEnabled< _etguidKeywordBFRESMGR >(); +// Used to down sample the cache trace if only the keyword BFRESMGRSUBSAMPLED is set. +template INLINE BOOL FOSEventTraceKeywordEnabled< _etguidKeywordBFRESMGRSUBSAMPLED >(); #endif // _OS_EVENT_TRACE_HXX_INCLUDED diff --git a/dev/ese/src/_etw/EseEtwEventsPregen.txt b/dev/ese/src/_etw/EseEtwEventsPregen.txt index cb52cfa7..9d3449a5 100644 --- a/dev/ese/src/_etw/EseEtwEventsPregen.txt +++ b/dev/ese/src/_etw/EseEtwEventsPregen.txt @@ -40,11 +40,11 @@ START_TRACE_LIST: CacheNewPage, 103, win:Informational, "BF Performance DataWorkingSet", CacheReadPage, 104, win:Informational, "BF Performance DataWorkingSet", CachePrereadPage, 105, win:Informational, "BF Performance DataWorkingSet", - CacheWritePage, 106, win:Informational, "BF BFRESMGR Performance DataWorkingSet", - CacheEvictPage, 107, win:Informational, "BF BFRESMGR DataWorkingSet", - CacheRequestPage, 108, win:Verbose, "BF BFRESMGR" - LatchPageDeprecated, 109, win:Informational, "BF BFRESMGR Performance", fDeprecated - CacheDirtyPage, 110, win:Verbose, "BF BFRESMGR Performance" + CacheWritePage, 106, win:Informational, "BF BFRESMGR BFRESMGRSUBSAMPLED Performance DataWorkingSet", + CacheEvictPage, 107, win:Informational, "BF BFRESMGR BFRESMGRSUBSAMPLED DataWorkingSet", + CacheRequestPage, 108, win:Verbose, "BF BFRESMGR BFRESMGRSUBSAMPLED" + LatchPageDeprecated, 109, win:Informational, "BF BFRESMGR BFRESMGRSUBSAMPLED Performance", fDeprecated + CacheDirtyPage, 110, win:Verbose, "BF BFRESMGR BFRESMGRSUBSAMPLED Performance" TransactionBegin, 111, win:Verbose, "Transaction" TransactionCommit, 112, win:Verbose, "Transaction" TransactionRollback, 113, win:Verbose, "Transaction" @@ -80,10 +80,10 @@ START_TRACE_LIST: CacheScavengeProgress, 143, win:Informational, "BF Performance", ApiCall_Start, 144, win:Informational, "Performance", ApiCall_Stop, 145, win:Informational, "Performance", fStopCodeNoTask - ResMgrInit, 146, win:Informational, "BFRESMGR", - ResMgrTerm, 147, win:Informational, "BFRESMGR", - CacheCachePage, 148, win:Verbose, "BF BFRESMGR", - MarkPageAsSuperCold, 149, win:Verbose, "BF BFRESMGR", + ResMgrInit, 146, win:Informational, "BFRESMGR BFRESMGRSUBSAMPLED", + ResMgrTerm, 147, win:Informational, "BFRESMGR BFRESMGRSUBSAMPLED", + CacheCachePage, 148, win:Verbose, "BF BFRESMGR BFRESMGRSUBSAMPLED", + MarkPageAsSuperCold, 149, win:Verbose, "BF BFRESMGR BFRESMGRSUBSAMPLED", CacheMissLatency, 150, win:Informational, "BF StallLatencies", BTreePrereadPageRequest, 151, win:Informational, "Performance", DiskFlushFileBuffers, 152, win:Informational, "IO", @@ -110,7 +110,7 @@ START_TRACE_LIST: IOThreadIssueProcessedIO, 173, win:Informational, "IO" IOIoreqCompletion, 174, win:Informational, "IO StallLatencies" CacheMemoryUsage, 175, win:Informational, "SubstrateTelemetry" - CacheSetLgposModify, 176, win:Verbose, "BF BFRESMGR" + CacheSetLgposModify, 176, win:Verbose, "BF BFRESMGR BFRESMGRSUBSAMPLED" START_TRACE_DEFNS: diff --git a/dev/ese/src/_etw/Microsoft-ETW-ESE.mc b/dev/ese/src/_etw/Microsoft-ETW-ESE.mc index 4d900ba4..ccedf46a 100644 --- a/dev/ese/src/_etw/Microsoft-ETW-ESE.mc +++ b/dev/ese/src/_etw/Microsoft-ETW-ESE.mc @@ -201,6 +201,11 @@ ESE_PRE_GEN_BASE_FILE: be pre-processed with eseetw.pl. message="$(string.Keyword.SubstrateTelemetry)" name="SubstrateTelemetry" /> + + () ) || ( g_ulSamplingRatio <= 1 ) ) + return ( ( g_ulSamplingRatio == 0 ) && + !FOSEventTraceAnyKeywordEnabled( g_ullSamplingKeywordMask ) ); +} + +// Subsample the cache trace by preventing some events from being emitted. +// Subsampling is only possible if no other keyword with events in common with BFRESMGRSUBSAMPLED are set and BFRESMGRSUBSAMPLED is set. +// If g_ulSamplingRatio is set to 0, no events are emitted. +// With g_ulSamplingRatio > 0, only the events for 1 in g_ulSamplingRatio pages are traced on average. +INLINE bool FBFIDoNotEmitBfResMgrPageTrace( const IFMP ifmp, const PGNO pgno ) +{ + if ( ( g_ulSamplingRatio == 1 ) || FOSEventTraceAnyKeywordEnabled( g_ullSamplingKeywordMask ) ) + { + return fFalse; + } + + if ( ( g_ulSamplingRatio == 0 ) || !FOSEventTraceKeywordEnabled<_etguidKeywordBFRESMGRSUBSAMPLED>() ) { - return true; + return fTrue; } - return ( ( ( IFMPPGNO( ifmp, pgno ).Hash() + g_ulSamplingSeed ) % g_ulSamplingRatio ) == 0 ); + + return ( ( ( IFMPPGNO( ifmp, pgno ).Hash() ) % g_ulSamplingRatio ) != 0 ); } + INLINE void BFITraceResMgrInit( const INT K, const double csecCorrelatedTouch, @@ -6304,35 +6323,41 @@ INLINE void BFITraceResMgrInit( const double dblHashUniformity, const double dblSpeedSizeTradeoff ) { + if ( !FBFIDoNotEmitBfResMgrInitTermTrace() ) + { #ifdef ENABLE_BFFTL_TRACING - (void)ErrBFIFTLSysResMgrInit( - K, - csecCorrelatedTouch, - csecTimeout, - csecUncertainty, - dblHashLoadFactor, - dblHashUniformity, - dblSpeedSizeTradeoff ); + ( void )ErrBFIFTLSysResMgrInit( + K, + csecCorrelatedTouch, + csecTimeout, + csecUncertainty, + dblHashLoadFactor, + dblHashUniformity, + dblSpeedSizeTradeoff ); #endif // ENABLE_BFFTL_TRACING - ETResMgrInit( - TickOSTimeCurrent(), - K, - csecCorrelatedTouch, - csecTimeout, - csecUncertainty, - dblHashLoadFactor, - dblHashUniformity, - dblSpeedSizeTradeoff ); + ETResMgrInit( + TickOSTimeCurrent(), + K, + csecCorrelatedTouch, + csecTimeout, + csecUncertainty, + dblHashLoadFactor, + dblHashUniformity, + dblSpeedSizeTradeoff ); + } } INLINE void BFITraceResMgrTerm() { + if ( !FBFIDoNotEmitBfResMgrInitTermTrace() ) + { #ifdef ENABLE_BFFTL_TRACING - (void)ErrBFIFTLSysResMgrTerm(); + ( void )ErrBFIFTLSysResMgrTerm(); #endif // ENABLE_BFFTL_TRACING - ETResMgrTerm( TickOSTimeCurrent() ); + ETResMgrTerm( TickOSTimeCurrent() ); + } } INLINE void BFITraceCachePage( @@ -6344,7 +6369,7 @@ INLINE void BFITraceCachePage( const BFRequestTraceFlags bfrtf, const TraceContext& tc ) { - if ( FBFISamplePage( pbf->ifmp, pbf->pgno ) ) + if ( !FBFIDoNotEmitBfResMgrPageTrace( pbf->ifmp, pbf->pgno ) ) { GetCurrUserTraceContext getutc; const BYTE bClientType = getutc->context.nClientType; @@ -6373,7 +6398,7 @@ INLINE void BFITraceRequestPage( const BFRequestTraceFlags bfrtf, const TraceContext& tc ) { - if ( FBFISamplePage( pbf->ifmp, pbf->pgno ) ) + if ( !FBFIDoNotEmitBfResMgrPageTrace( pbf->ifmp, pbf->pgno ) ) { #ifdef ENABLE_BFFTL_TRACING GetCurrUserTraceContext getutc; @@ -6421,7 +6446,7 @@ INLINE void BFITraceMarkPageAsSuperCold( const IFMP ifmp, const PGNO pgno ) { - if ( FBFISamplePage( ifmp, pgno ) ) + if ( !FBFIDoNotEmitBfResMgrPageTrace( ifmp, pgno ) ) { #ifdef ENABLE_BFFTL_TRACING ( void )ErrBFIFTLMarkAsSuperCold( ifmp, pgno ); @@ -6438,7 +6463,7 @@ INLINE void BFITraceEvictPage( const ERR errBF, const ULONG bfef ) { - if ( FBFISamplePage( ifmp, pgno ) ) + if ( !FBFIDoNotEmitBfResMgrPageTrace( ifmp, pgno ) ) { const ULONG pctPriority = 0; // Not relevant for eviction anymore. @@ -6458,7 +6483,7 @@ INLINE void BFITraceDirtyPage( const TraceContext& tc ) { - if ( FBFISamplePage( pbf->ifmp, pbf->pgno ) ) + if ( !FBFIDoNotEmitBfResMgrPageTrace( pbf->ifmp, pbf->pgno ) ) { auto tick = TickOSTimeCurrent(); static_assert( sizeof( tick ) == sizeof( DWORD ), "Compiler magic failing." ); @@ -6552,7 +6577,7 @@ INLINE void BFITraceSetLgposModify( const PBF pbf, const LGPOS& lgposModify ) { - if ( FBFISamplePage( pbf->ifmp, pbf->pgno ) ) + if ( !FBFIDoNotEmitBfResMgrPageTrace( pbf->ifmp, pbf->pgno ) ) { auto tick = TickOSTimeCurrent(); static_assert( sizeof( tick ) == sizeof( DWORD ), "Compiler magic failing." ); @@ -6579,7 +6604,7 @@ INLINE void BFITraceWritePage( const PBF pbf, const FullTraceContext& tc ) { - if ( FBFISamplePage( pbf->ifmp, pbf->pgno ) ) + if ( !FBFIDoNotEmitBfResMgrPageTrace( pbf->ifmp, pbf->pgno ) ) { const ULONG bfdfTrace = (ULONG) pbf->bfdf; // We need to put this on the stack because & isn't valid on a bitfield auto tick = TickOSTimeCurrent(); diff --git a/dev/ese/src/ese/sysparamtable.g.cxx b/dev/ese/src/ese/sysparamtable.g.cxx index 89ac3ace..60c4e679 100644 --- a/dev/ese/src/ese/sysparamtable.g.cxx +++ b/dev/ese/src/ese/sysparamtable.g.cxx @@ -198,7 +198,7 @@ JetParam g_rgparamRaw[] = NORMAL_PARAM(JET_paramHungIOActions, CJetParam::typeInteger, 1, 1, 1, 1, JET_bitNil, (JET_bitHungIOEvent|JET_bitHungIOCancel|JET_bitHungIODebug|JET_bitHungIOEnforce|JET_bitHungIOTimeout), JET_bitHungIOEvent), NORMAL_PARAM(JET_paramMinDataForXpress, CJetParam::typeInteger, 1, 1, 1, 1, 0, 2147483647, 1024), CUSTOM_PARAM3(JET_paramEnableShrinkDatabase, CJetParam::typeGrbit, 0, 0, 0, 1, 0, 0xffff, JET_paramEnableShrinkDatabase_DEFAULT, JET_paramEnableShrinkDatabase_DEFAULT, CJetParam::GetInteger, SetShrinkDatabaseParam, CJetParam::CloneDefault), - CUSTOM_PARAM3(JET_paramFlight_CacheTraceSamplingRatio, CJetParam::typeInteger, 0, 1, 0, 0, 1, 2147483647, 1, 1, CJetParam::GetInteger, SetCacheTraceSamplingRatio, CJetParam::CloneDefault), + CUSTOM_PARAM3(JET_paramFlight_CacheTraceSamplingRatio, CJetParam::typeInteger, 0, 1, 0, 0, 0, 2147483647, 1, 1, CJetParam::GetInteger, SetCacheTraceSamplingRatio, CJetParam::CloneDefault), NORMAL_PARAM(JET_paramProcessFriendlyName, CJetParam::typeString, 0, 1, 1, 1, 0, JET_cbNameMost, L""), NORMAL_PARAM(JET_paramDurableCommitCallback, CJetParam::typePointer, 1, 0, 0, 1, 0, -1, NULL), IGNORED_PARAM(JET_paramEnableSqm, CJetParam::typeInteger, 0, 0, 0, 1, JET_sqmDisable, JET_sqmFromCEIP, JET_sqmEnable), diff --git a/dev/ese/src/os/oseventtrace.cxx b/dev/ese/src/os/oseventtrace.cxx index 5374a87e..dfc9cbc5 100644 --- a/dev/ese/src/os/oseventtrace.cxx +++ b/dev/ese/src/os/oseventtrace.cxx @@ -200,6 +200,7 @@ ERR ErrOSEventTraceInit() // Needed to make everything compile. Harmless. (void)FOSEventTraceEnabled(); + (void)FOSEventTraceAnyKeywordEnabled( 0 ); return JET_errSuccess; } @@ -283,3 +284,19 @@ INLINE BOOL FOSEventTraceKeywordEnabled() return ( etguid & p->MatchAnyKeyword ); } + +INLINE BOOL FOSEventTraceAnyKeywordEnabled( const ULONGLONG ullKeywordMask ) +{ + if ( g_fDisableTracingForced ) + { + return fFalse; + } + +#ifdef ESENT + MCGEN_TRACE_CONTEXT* p = &Microsoft_Windows_ESE_Context; +#else + MCGEN_TRACE_CONTEXT* p = &Microsoft_Exchange_ESE_Context; +#endif + + return ( ( ullKeywordMask & p->MatchAnyKeyword ) != 0 ); +} From 0cb7cd5c76b1c4b94462d899a1ca94a715827575 Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Tue, 18 Oct 2022 23:20:30 +0000 Subject: [PATCH 066/102] ESE: Stability: fix hang during sync scavenge If we are doing a sync clean and things get so bad that we are at max scavenge priority then we will start to issue async write back IO at qosIODispatchImmediate. The problem with this is that these requests can block rather than be rejected with errDiskTilt. Normally, if we get errDiskTilt, we will call CBFIssueList::ErrIssue to release any pending work to avoid a hang. In this case, we don't do that so a hang can occur. The fix is to detect when we are escaling to qosIODispatchImmediate and call CBFIssueList::ErrIssue first before requesting the async write back. This was found while running retail stress with EBC enabled. [Substrate:017b2a98ec34e430b305868f30d1ec729caed277] --- dev/ese/src/ese/bf.cxx | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dev/ese/src/ese/bf.cxx b/dev/ese/src/ese/bf.cxx index d7fef97b..79edf743 100644 --- a/dev/ese/src/ese/bf.cxx +++ b/dev/ese/src/ese/bf.cxx @@ -11218,8 +11218,14 @@ ERR ErrBFIMaintScavengeIScavengePages( const char* const szContextTraceOnly, con } // Async-flush this page. + // + // NOTE: this can block if ulScavengeWriteSev == ulScavengeWriteMax which would give us qosIODispatchImmediate const IOREASON ior = IOR( ( ( bfefReason == bfefReasonShrink ) ? iorpBFShrink : iorpBFAvailPool ), fSync ? iorfForeground : iorfNone ); const OSFILEQOS qos = QosBFIMaintScavengePages( UlParam( PinstFromIfmp( pbf->ifmp ), JET_paramIOPriority ), ulScavengeWriteSev ); + if ( ( qos & qosIODispatchMask ) == qosIODispatchImmediate ) + { + (void)bfil.ErrIssue( fFalse ); + } const ERR errFlush = ErrBFIFlushPage( pbf, ior, qos, bfdfDirty, fFalse /* fOpportune */, &fPermanentErr ); // Count the number of latched pages we see. From db009476461d9334561ce17256bf0cd6de1dc04a Mon Sep 17 00:00:00 2001 From: Anil Ruia Date: Wed, 19 Oct 2022 04:02:11 +0000 Subject: [PATCH 067/102] BCrypt/CNG based implementation of AES256 compatible with the current CAPI implementation CAPI is a wrapper around CNG so calling CNG directly will be more efficient Use dynamic load to avoid adding lib in ~200 vcxproj's Only exercised in oslayerunit for now, will add to ESE (flighted) next [Substrate:9245020601326aaf969b2fe3d4994c9c182ba1a8] --- dev/ese/published/inc/os/encrypt.hxx | 31 +- dev/ese/src/ese/_osu/encryptu.cxx | 4 +- dev/ese/src/ese/info.cxx | 4 +- dev/ese/src/ese/jetapi.cxx | 2 +- dev/ese/src/os/CMakeLists.txt | 1 + dev/ese/src/os/edbg.cxx | 1 + dev/ese/src/os/encrypt.cxx | 158 ++++++-- dev/ese/src/os/encrypt2.cxx | 368 ++++++++++++++++++ dev/ese/src/os/library.cxx | 3 +- dev/ese/src/os/osfile.cxx | 2 +- dev/ese/src/os/osstd_.hxx | 4 +- .../oslayer/oslayerunit/encryptiontest.cxx | 145 +++---- 12 files changed, 592 insertions(+), 131 deletions(-) create mode 100644 dev/ese/src/os/encrypt2.cxx diff --git a/dev/ese/published/inc/os/encrypt.hxx b/dev/ese/published/inc/os/encrypt.hxx index 0d82c89e..9df20f87 100644 --- a/dev/ese/published/inc/os/encrypt.hxx +++ b/dev/ese/published/inc/os/encrypt.hxx @@ -9,30 +9,41 @@ Crc32Checksum( _In_reads_bytes_( cbData ) const BYTE *pbData, _In_ ULONG cbData ); +ULONG CbOSEncryptAes256SizeNeeded( ULONG cbDataLen ); + +enum AES256_IMPLEMENTATION +{ + AES256_CAPI_IMPLEMENTATION, + AES256_CNG_IMPLEMENTATION +}; + ERR ErrOSCreateAes256Key( + _In_ AES256_IMPLEMENTATION impl, _Out_writes_bytes_to_opt_(*pcbKeySize, *pcbKeySize) BYTE *pbKey, _Inout_ ULONG *pcbKeySize ); -ULONG CbOSEncryptAes256SizeNeeded( ULONG cbDataLen ); - -ERR ErrOSEncryptionVerifyKey( - _In_reads_bytes_(cbKey) const BYTE *pbKey, - _In_ ULONG cbKey ); +ERR +ErrOSEncryptionVerifyKey( + _In_ AES256_IMPLEMENTATION impl, + _In_reads_bytes_(cbKey) const BYTE *pbKey, + _In_ ULONG cbKey ); // Encrypt using AES256 encryption in CBC mode with PKCS5 padding. // Also, there is initial padding for checksum, InitVector in the output data. // Use CbOSEncryptAes256SizeNeeded above to figure out how big the output buffer needs to be. ERR ErrOSEncryptWithAes256( - _Inout_updates_bytes_to_(cbDataBufLen, *pcbDataLen) BYTE *pbData, - _Inout_ ULONG *pcbDataLen, - _In_ ULONG cbDataBufLen, - _In_reads_bytes_(cbKey) const BYTE *pbKey, - _In_ ULONG cbKey ); + _In_ AES256_IMPLEMENTATION impl, + _Inout_updates_bytes_to_(cbDataBufLen, *pcbDataLen) BYTE *pbData, + _Inout_ ULONG *pcbDataLen, + _In_ ULONG cbDataBufLen, + _In_reads_bytes_(cbKey) const BYTE *pbKey, + _In_ ULONG cbKey ); ERR ErrOSDecryptWithAes256( + _In_ AES256_IMPLEMENTATION impl, _In_reads_( *pcbDataLen ) BYTE *pbDataIn, _Out_writes_bytes_to_(*pcbDataLen, *pcbDataLen) BYTE *pbDataOut, _Inout_ ULONG *pcbDataLen, diff --git a/dev/ese/src/ese/_osu/encryptu.cxx b/dev/ese/src/ese/_osu/encryptu.cxx index 137c37d9..731059b0 100644 --- a/dev/ese/src/ese/_osu/encryptu.cxx +++ b/dev/ese/src/ese/_osu/encryptu.cxx @@ -75,7 +75,7 @@ ErrOSUEncrypt( PERFOpt( cEncryptionCalls.Inc( iInstance, tce ) ); const HRT hrtStart = HrtHRTCount(); - const ERR err = ErrOSEncryptWithAes256( pbData, pcbDataLen, cbDataBufLen, pfucbTable->pbEncryptionKey, pfucbTable->cbEncryptionKey ); + const ERR err = ErrOSEncryptWithAes256( AES256_CAPI_IMPLEMENTATION, pbData, pcbDataLen, cbDataBufLen, pfucbTable->pbEncryptionKey, pfucbTable->cbEncryptionKey ); PERFOpt( cEncryptionTotalDhrts.Add( iInstance, tce, HrtHRTCount() - hrtStart ) ); return err; @@ -100,7 +100,7 @@ ErrOSUDecrypt( PERFOpt( cDecryptionCalls.Inc( iInstance, tce ) ); const HRT hrtStart = HrtHRTCount(); - const ERR err = ErrOSDecryptWithAes256( pbDataIn, pbDataOut, pcbDataLen, pfucbTable->pbEncryptionKey, pfucbTable->cbEncryptionKey ); + const ERR err = ErrOSDecryptWithAes256( AES256_CAPI_IMPLEMENTATION, pbDataIn, pbDataOut, pcbDataLen, pfucbTable->pbEncryptionKey, pfucbTable->cbEncryptionKey ); PERFOpt( cDecryptionTotalDhrts.Add( iInstance, tce, HrtHRTCount() - hrtStart ) ); if ( err == JET_errDecryptionFailed ) diff --git a/dev/ese/src/ese/info.cxx b/dev/ese/src/ese/info.cxx index 27fb8d22..c7d6f571 100644 --- a/dev/ese/src/ese/info.cxx +++ b/dev/ese/src/ese/info.cxx @@ -1273,7 +1273,7 @@ ERR VTAPI ErrIsamSetTableInfo( FUCBRemoveEncryptionKey( pfucb ); if ( cbParam > 0 ) { - err = ErrOSEncryptionVerifyKey( (BYTE*)pvParam, cbParam ); + err = ErrOSEncryptionVerifyKey( AES256_CAPI_IMPLEMENTATION, (BYTE*)pvParam, cbParam ); if ( err < JET_errSuccess ) { AssertSz( fFalse, "Client is giving us a bad key" ); @@ -1606,7 +1606,7 @@ ERR VTAPI ErrIsamGetTableInfo( #ifdef DEBUG if ( pfucb->cbEncryptionKey > 0 ) { - ERR errT = ErrOSEncryptionVerifyKey( pfucb->pbEncryptionKey, pfucb->cbEncryptionKey ); + ERR errT = ErrOSEncryptionVerifyKey( AES256_CAPI_IMPLEMENTATION, pfucb->pbEncryptionKey, pfucb->cbEncryptionKey ); if ( errT < JET_errSuccess ) { AssertSz( fFalse, "Client should not have been able to save a bad encryption key" ); diff --git a/dev/ese/src/ese/jetapi.cxx b/dev/ese/src/ese/jetapi.cxx index 425c01a2..5a924747 100644 --- a/dev/ese/src/ese/jetapi.cxx +++ b/dev/ese/src/ese/jetapi.cxx @@ -9530,7 +9530,7 @@ LOCAL JET_ERR JET_API JetCreateEncryptionKeyEx( return ErrERRCheck( JET_errInvalidParameter ); } *pcbActual = cbKey; - return ErrOSCreateAes256Key( (BYTE*)pvKey, pcbActual ); + return ErrOSCreateAes256Key( AES256_CAPI_IMPLEMENTATION, (BYTE*)pvKey, pcbActual ); } JET_ERR JET_API JetCreateEncryptionKey( diff --git a/dev/ese/src/os/CMakeLists.txt b/dev/ese/src/os/CMakeLists.txt index fa1b98c9..e3e5d9d1 100644 --- a/dev/ese/src/os/CMakeLists.txt +++ b/dev/ese/src/os/CMakeLists.txt @@ -41,6 +41,7 @@ set(OS_SOURCES ${ESE_DEV}/src/os/dllentry.cxx ${ESE_DEV}/src/os/edbg.cxx ${ESE_DEV}/src/os/encrypt.cxx + ${ESE_DEV}/src/os/encrypt2.cxx ${ESE_DEV}/src/os/error.cxx ${ESE_DEV}/src/os/event.cxx ${ESE_DEV}/src/os/hapublish.cxx diff --git a/dev/ese/src/os/edbg.cxx b/dev/ese/src/os/edbg.cxx index c81f26bc..cad22037 100644 --- a/dev/ese/src/os/edbg.cxx +++ b/dev/ese/src/os/edbg.cxx @@ -16456,6 +16456,7 @@ DEBUG_EXT( EDBGDecrypt ) } err = ErrOSDecryptWithAes256( + AES256_CAPI_IMPLEMENTATION, pbBuffer, pbDecrypted, &cbDecrypted, diff --git a/dev/ese/src/os/encrypt.cxx b/dev/ese/src/os/encrypt.cxx index 4a36510b..d75a1b10 100644 --- a/dev/ese/src/os/encrypt.cxx +++ b/dev/ese/src/os/encrypt.cxx @@ -185,7 +185,6 @@ Crc32Checksum( HCRYPTPROV g_hAESProv = NULL; -CCriticalSection g_critAESProv( CLockBasicInfo( CSyncBasicInfo( "g_critAESProv" ), rankAESProv, 0 ) ); #define BlockSizeAes256 16 BOOL FOSEncryptionPreinit() @@ -205,32 +204,17 @@ ErrOSEncryptionInit() ERR ErrOSIAESProviderInit() { - if ( g_hAESProv != NULL ) - { - return JET_errSuccess; - } - - g_critAESProv.Enter(); - - if ( g_hAESProv != NULL ) + if ( !CryptAcquireContextW( &g_hAESProv, NULL, MS_ENH_RSA_AES_PROV_W, PROV_RSA_AES, CRYPT_VERIFYCONTEXT ) ) { - g_critAESProv.Leave(); - return JET_errSuccess; - } - - HCRYPTPROV hAESProv = NULL; - if ( !CryptAcquireContextW( &hAESProv, NULL, MS_ENH_RSA_AES_PROV_W, PROV_RSA_AES, CRYPT_VERIFYCONTEXT ) ) - { - g_critAESProv.Leave(); return ErrOSErrFromWin32Err(GetLastError()); } - const VOID *hPrev = AtomicExchangePointer( (VOID **)&g_hAESProv, (VOID *)hAESProv ); - Assert( hPrev == NULL ); - g_critAESProv.Leave(); return JET_errSuccess; } +CInitOnce< ERR, decltype(&ErrOSIAESProviderInit) > g_AESInitOnce; +extern void OSBCryptEncryptionTerm(); + void OSEncryptionTerm() { @@ -239,6 +223,9 @@ OSEncryptionTerm() CryptReleaseContext( g_hAESProv, 0 ); g_hAESProv = NULL; } + g_AESInitOnce.Reset(); + + OSBCryptEncryptionTerm(); } #include @@ -250,7 +237,7 @@ struct AES256KEY BYTE pbKey[0]; }; -ERR ErrOSEncryptionVerifyKey( +ERR ErrOSCAPIEncryptionVerifyKey( _In_reads_bytes_(cbKey) const BYTE *pbKey, _In_ ULONG cbKey ) { @@ -267,7 +254,7 @@ ERR ErrOSEncryptionVerifyKey( } ERR -ErrOSCreateAes256Key( +ErrOSCAPICreateAes256Key( _Out_writes_bytes_to_opt_(*pcbKeySize, *pcbKeySize) BYTE *pbKey, _Inout_ ULONG *pcbKeySize ) { @@ -276,7 +263,7 @@ ErrOSCreateAes256Key( AES256KEY *pKey = (AES256KEY *)pbKey; ULONG cbKeySize = *pcbKeySize; - CallR( ErrOSIAESProviderInit() ); + CallR( g_AESInitOnce.Init( ErrOSIAESProviderInit )); if ( !CryptGenKey( g_hAESProv, CALG_AES_256, CRYPT_EXPORTABLE, &hKey ) ) { @@ -326,7 +313,7 @@ ErrOSCreateAes256Key( Expected( blockSize == BlockSizeAes256*8 ); } - CallS( ErrOSEncryptionVerifyKey( pbKey, *pcbKeySize ) ); + CallS( ErrOSCAPIEncryptionVerifyKey( pbKey, *pcbKeySize ) ); #endif HandleError: @@ -352,7 +339,7 @@ ULONG CbOSEncryptAes256SizeNeeded( ULONG cbDataLen ) } ERR -ErrOSEncryptWithAes256( +ErrOSCAPIEncryptWithAes256( _Inout_updates_bytes_to_(cbDataBufLen, *pcbDataLen) BYTE *pbData, _Inout_ ULONG *pcbDataLen, _In_ ULONG cbDataBufLen, @@ -366,9 +353,9 @@ ErrOSEncryptWithAes256( AES256BLOBTRAILER trailer; ULONG cbNeeded; - CallR( ErrOSIAESProviderInit() ); + CallR( g_AESInitOnce.Init( ErrOSIAESProviderInit )); - CallR( ErrOSEncryptionVerifyKey( pbKey, cbKey ) ); + CallR( ErrOSCAPIEncryptionVerifyKey( pbKey, cbKey ) ); cbNeeded = CbOSEncryptAes256SizeNeeded( *pcbDataLen ); if ( cbNeeded > cbDataBufLen ) @@ -425,7 +412,7 @@ ErrOSEncryptWithAes256( } ERR -ErrOSDecryptWithAes256( +ErrOSCAPIDecryptWithAes256( _In_reads_( *pcbDataLen ) BYTE *pbDataIn, _Out_writes_bytes_to_(*pcbDataLen, *pcbDataLen) BYTE *pbDataOut, _Inout_ ULONG *pcbDataLen, @@ -438,9 +425,9 @@ ErrOSDecryptWithAes256( ULONG checksum; AES256BLOBTRAILER *ptrailer; - CallR( ErrOSIAESProviderInit() ); + CallR( g_AESInitOnce.Init( ErrOSIAESProviderInit )); - CallR( ErrOSEncryptionVerifyKey( pbKey, cbKey ) ); + CallR( ErrOSCAPIEncryptionVerifyKey( pbKey, cbKey ) ); if ( *pcbDataLen < BlockSizeAes256 + sizeof(AES256BLOBTRAILER) || *pcbDataLen % BlockSizeAes256 != sizeof(AES256BLOBTRAILER) % BlockSizeAes256 ) @@ -498,3 +485,114 @@ ErrOSDecryptWithAes256( return err; } +ERR +ErrOSBCryptCreateAes256Key( + _Out_writes_bytes_to_opt_(*pcbKeySize, *pcbKeySize) BYTE *pbKey, + _Inout_ ULONG *pcbKeySize ); + +ERR +ErrOSBCryptEncryptionVerifyKey( + _In_reads_bytes_(cbKey) const BYTE *pbKey, + _In_ ULONG cbKey ); + +ERR +ErrOSBCryptEncryptWithAes256( + _Inout_updates_bytes_to_(cbDataBufLen, *pcbDataLen) BYTE *pbData, + _Inout_ ULONG *pcbDataLen, + _In_ ULONG cbDataBufLen, + _In_reads_bytes_(cbKey) const BYTE *pbKey, + _In_ ULONG cbKey ); + +ERR +ErrOSBCryptDecryptWithAes256( + _In_reads_( *pcbDataLen ) BYTE *pbDataIn, + _Out_writes_bytes_to_(*pcbDataLen, *pcbDataLen) BYTE *pbDataOut, + _Inout_ ULONG *pcbDataLen, + _In_reads_bytes_(cbKey) const BYTE *pbKey, + _In_ ULONG cbKey ); + +ERR +ErrOSCreateAes256Key( + _In_ AES256_IMPLEMENTATION impl, + _Out_writes_bytes_to_opt_(*pcbKeySize, *pcbKeySize) BYTE *pbKey, + _Inout_ ULONG *pcbKeySize ) +{ + switch( impl ) + { + case AES256_CAPI_IMPLEMENTATION: + return ErrOSCAPICreateAes256Key( pbKey, pcbKeySize ); + + case AES256_CNG_IMPLEMENTATION: + return ErrOSBCryptCreateAes256Key( pbKey, pcbKeySize ); + + default: + Assert( fFalse ); + return ErrERRCheck( JET_wrnNyi ); + } +} + +ERR ErrOSEncryptionVerifyKey( + _In_ AES256_IMPLEMENTATION impl, + _In_reads_bytes_(cbKey) const BYTE *pbKey, + _In_ ULONG cbKey ) +{ + switch( impl ) + { + case AES256_CAPI_IMPLEMENTATION: + return ErrOSCAPIEncryptionVerifyKey( pbKey, cbKey ); + + case AES256_CNG_IMPLEMENTATION: + return ErrOSBCryptEncryptionVerifyKey( pbKey, cbKey ); + + default: + Assert( fFalse ); + return ErrERRCheck( JET_wrnNyi ); + } +} + +ERR +ErrOSEncryptWithAes256( + _In_ AES256_IMPLEMENTATION impl, + _Inout_updates_bytes_to_(cbDataBufLen, *pcbDataLen) BYTE *pbData, + _Inout_ ULONG *pcbDataLen, + _In_ ULONG cbDataBufLen, + _In_reads_bytes_(cbKey) const BYTE *pbKey, + _In_ ULONG cbKey ) +{ + switch( impl ) + { + case AES256_CAPI_IMPLEMENTATION: + return ErrOSCAPIEncryptWithAes256( pbData, pcbDataLen, cbDataBufLen, pbKey, cbKey ); + + case AES256_CNG_IMPLEMENTATION: + return ErrOSBCryptEncryptWithAes256( pbData, pcbDataLen, cbDataBufLen, pbKey, cbKey ); + + default: + Assert( fFalse ); + return ErrERRCheck( JET_wrnNyi ); + } +} + +ERR +ErrOSDecryptWithAes256( + _In_ AES256_IMPLEMENTATION impl, + _In_reads_( *pcbDataLen ) BYTE *pbDataIn, + _Out_writes_bytes_to_(*pcbDataLen, *pcbDataLen) BYTE *pbDataOut, + _Inout_ ULONG *pcbDataLen, + _In_reads_bytes_(cbKey) const BYTE *pbKey, + _In_ ULONG cbKey ) +{ + switch( impl ) + { + case AES256_CAPI_IMPLEMENTATION: + return ErrOSCAPIDecryptWithAes256( pbDataIn, pbDataOut, pcbDataLen, pbKey, cbKey ); + + case AES256_CNG_IMPLEMENTATION: + return ErrOSBCryptDecryptWithAes256( pbDataIn, pbDataOut, pcbDataLen, pbKey, cbKey ); + + default: + Assert( fFalse ); + return ErrERRCheck( JET_wrnNyi ); + } +} + diff --git a/dev/ese/src/os/encrypt2.cxx b/dev/ese/src/os/encrypt2.cxx new file mode 100644 index 00000000..ad7e02b1 --- /dev/null +++ b/dev/ese/src/os/encrypt2.cxx @@ -0,0 +1,368 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#include "osstd.hxx" +#include +// Need to include CAPI header for some CAPI constants/structs to maintain compatibility with CAPI implementation. +#include +#include + +BCRYPT_ALG_HANDLE g_hBCryptAesAlg = NULL; +ULONG g_cbKeyObject = 0; + +// PERSISTED +#define BLOCK_SIZE_AES256 16 +#define KEY_SIZE_AES256 32 + +static NTOSFuncError( g_pfnRtlNtStatusToDosError, g_mwszzNtdllLibs, RtlNtStatusToDosError, oslfExpectedOnWin5x ); + +// Don't want to add bcrypt.lib in 200 different vcxproj's +static NTOSFuncNtStd( g_pfnBCryptOpenAlgorithmProvider, g_mwszzBCryptLib, BCryptOpenAlgorithmProvider, oslfExpectedOnWin7 ); +static NTOSFuncNtStd( g_pfnBCryptCloseAlgorithmProvider, g_mwszzBCryptLib, BCryptCloseAlgorithmProvider, oslfExpectedOnWin7 ); +static NTOSFuncNtStd( g_pfnBCryptGetProperty, g_mwszzBCryptLib, BCryptGetProperty, oslfExpectedOnWin7 ); +static NTOSFuncNtStd( g_pfnBCryptSetProperty, g_mwszzBCryptLib, BCryptSetProperty, oslfExpectedOnWin7 ); +static NTOSFuncNtStd( g_pfnBCryptGenRandom, g_mwszzBCryptLib, BCryptGenRandom, oslfExpectedOnWin7 ); +static NTOSFuncNtStd( g_pfnBCryptGenerateSymmetricKey, g_mwszzBCryptLib, BCryptGenerateSymmetricKey, oslfExpectedOnWin7 ); +static NTOSFuncNtStd( g_pfnBCryptDestroyKey, g_mwszzBCryptLib, BCryptDestroyKey, oslfExpectedOnWin7 ); +static NTOSFuncNtStd( g_pfnBCryptEncrypt, g_mwszzBCryptLib, BCryptEncrypt, oslfExpectedOnWin7 ); +static NTOSFuncNtStd( g_pfnBCryptDecrypt, g_mwszzBCryptLib, BCryptDecrypt, oslfExpectedOnWin7 ); + +ERR +ErrOSIBCryptAESProviderInit() +{ + ERR err = JET_errSuccess; + NTSTATUS status; + + if ( !NT_SUCCESS( status = g_pfnBCryptOpenAlgorithmProvider( &g_hBCryptAesAlg, BCRYPT_AES_ALGORITHM, NULL, 0 ))) + { + Error( ErrOSErrFromWin32Err( g_pfnRtlNtStatusToDosError( status ))); + } + + if ( !NT_SUCCESS( status = g_pfnBCryptSetProperty( + g_hBCryptAesAlg, + BCRYPT_CHAINING_MODE, + (PBYTE)BCRYPT_CHAIN_MODE_CBC, + sizeof(BCRYPT_CHAIN_MODE_CBC), + 0 ))) + { + Error( ErrOSErrFromWin32Err( g_pfnRtlNtStatusToDosError( status ))); + } + + ULONG cbData = 0; + if( !NT_SUCCESS( status = g_pfnBCryptGetProperty( + g_hBCryptAesAlg, + BCRYPT_OBJECT_LENGTH, + (PBYTE)&g_cbKeyObject, + sizeof(ULONG), + &cbData, + 0 ))) + { + Error( ErrOSErrFromWin32Err( g_pfnRtlNtStatusToDosError( status ))); + } + if ( cbData != sizeof( g_cbKeyObject ) ) + { + Error( JET_errInvalidParameter ); + } + + ULONG cbBlockSize = 0; + if( !NT_SUCCESS( status = g_pfnBCryptGetProperty( + g_hBCryptAesAlg, + BCRYPT_BLOCK_LENGTH, + (PBYTE)&cbBlockSize, + sizeof(ULONG), + &cbData, + 0 ))) + { + Error( ErrOSErrFromWin32Err( g_pfnRtlNtStatusToDosError( status ))); + } + Assert( cbBlockSize == BLOCK_SIZE_AES256 ); + if ( cbData != sizeof( cbBlockSize ) || cbBlockSize != BLOCK_SIZE_AES256 ) + { + Error( JET_errInvalidParameter ); + } + +HandleError: + return err; +} + +CInitOnce< ERR, decltype(&ErrOSIBCryptAESProviderInit) > g_BCryptInitOnce; + +void +OSBCryptEncryptionTerm() +{ + if ( g_hBCryptAesAlg != NULL ) + { + g_pfnBCryptCloseAlgorithmProvider( g_hBCryptAesAlg, 0 ); + g_hBCryptAesAlg = NULL; + } + g_BCryptInitOnce.Reset(); +} + +// Use an expanded structure that is identical to the CAPI exported key +#include +// PERSISTED +struct AES256KEYEXPANDED +{ + BYTE Version; + UnalignedLittleEndian Checksum; + PUBLICKEYSTRUC blobHeader; + UnalignedLittleEndian keySize; + BYTE pbKey[KEY_SIZE_AES256]; +}; + +ERR ErrOSBCryptEncryptionVerifyKey( + _In_reads_bytes_(cbKey) const BYTE *pbKey, + _In_ ULONG cbKey ) +{ + AES256KEYEXPANDED *pKey = (AES256KEYEXPANDED *)pbKey; + + if ( cbKey != sizeof(AES256KEYEXPANDED) || + pKey->Version != JET_EncryptionAlgorithmAes256 || + pKey->blobHeader.bType != PLAINTEXTKEYBLOB || + pKey->blobHeader.bVersion != CUR_BLOB_VERSION || + pKey->blobHeader.reserved != 0 || + pKey->blobHeader.aiKeyAlg != CALG_AES_256 || + pKey->keySize != KEY_SIZE_AES256 || + pKey->Checksum != Crc32Checksum( (BYTE *)&pKey->blobHeader, sizeof( AES256KEYEXPANDED ) - OffsetOf( AES256KEYEXPANDED, blobHeader ) ) ) + { + return ErrERRCheck( JET_errInvalidParameter ); + } + + return JET_errSuccess; +} + +ERR +ErrOSBCryptCreateAes256Key( + _Out_writes_bytes_to_opt_(*pcbKeySize, *pcbKeySize) BYTE *pbKey, + _Inout_ ULONG *pcbKeySize ) +{ + ERR err = JET_errSuccess; + NTSTATUS status; + + if ( *pcbKeySize < sizeof( AES256KEYEXPANDED ) ) + { + *pcbKeySize = sizeof( AES256KEYEXPANDED ); + return ErrERRCheck( JET_errBufferTooSmall ); + } + + AES256KEYEXPANDED *pKey = (AES256KEYEXPANDED *)pbKey; + pKey->Version = JET_EncryptionAlgorithmAes256; + pKey->blobHeader.bType = PLAINTEXTKEYBLOB; + pKey->blobHeader.bVersion = CUR_BLOB_VERSION; + pKey->blobHeader.reserved = 0; + pKey->blobHeader.aiKeyAlg = CALG_AES_256; + pKey->keySize = KEY_SIZE_AES256; + if ( !NT_SUCCESS( status = g_pfnBCryptGenRandom( NULL, pKey->pbKey, sizeof( pKey->pbKey ), BCRYPT_USE_SYSTEM_PREFERRED_RNG ) ) ) + { + return ErrOSErrFromWin32Err( g_pfnRtlNtStatusToDosError( status )); + } + pKey->Checksum = Crc32Checksum( (BYTE *)&pKey->blobHeader, sizeof( AES256KEYEXPANDED ) - OffsetOf( AES256KEYEXPANDED, blobHeader ) ); + + *pcbKeySize = sizeof( AES256KEYEXPANDED ); + +#ifdef DEBUG + CallS( ErrOSBCryptEncryptionVerifyKey( pbKey, *pcbKeySize ) ); +#endif + + return err; +} + +// PERSISTED +struct AES256BLOBTRAILER +{ + BYTE Version; + BYTE InitVector[BLOCK_SIZE_AES256]; +}; + +ERR +ErrOSBCryptEncryptWithAes256( + _Inout_updates_bytes_to_(cbDataBufLen, *pcbDataLen) BYTE *pbData, + _Inout_ ULONG *pcbDataLen, + _In_ ULONG cbDataBufLen, + _In_reads_bytes_(cbKey) const BYTE *pbKey, + _In_ ULONG cbKey ) +{ + ERR err = JET_errSuccess; + NTSTATUS status; + AES256KEYEXPANDED *pKey = (AES256KEYEXPANDED *)pbKey; + BCRYPT_KEY_HANDLE hKey = NULL; + ULONG checksum; + AES256BLOBTRAILER trailer; + ULONG cbNeeded; + BYTE InitVector[BLOCK_SIZE_AES256]; + BYTE *pbKeyObject; + + CallR( g_BCryptInitOnce.Init( ErrOSIBCryptAESProviderInit ) ); + + CallR( ErrOSBCryptEncryptionVerifyKey( pbKey, cbKey ) ); + + cbNeeded = CbOSEncryptAes256SizeNeeded( *pcbDataLen ); + if ( cbNeeded > cbDataBufLen ) + { + *pcbDataLen = cbNeeded; + return ErrERRCheck( JET_errBufferTooSmall ); + } + + checksum = Crc32Checksum( pbData, *pcbDataLen ); + // checksum is appended to the plaintext + *(UnalignedLittleEndian *)(pbData + *pcbDataLen) = checksum; + *pcbDataLen += sizeof(checksum); + + pbKeyObject = (BYTE *)_alloca( g_cbKeyObject ); + if ( !NT_SUCCESS( status = g_pfnBCryptGenerateSymmetricKey( + g_hBCryptAesAlg, + &hKey, + pbKeyObject, + g_cbKeyObject, + pKey->pbKey, + sizeof( pKey->pbKey ), + 0 ))) + { + return ErrOSErrFromWin32Err( g_pfnRtlNtStatusToDosError( status )); + } +#ifdef DEBUG + ULONG cbKeyLength, cbOut; + if( !NT_SUCCESS( status = g_pfnBCryptGetProperty( + hKey, + BCRYPT_KEY_LENGTH, + (PBYTE)&cbKeyLength, + sizeof(ULONG), + &cbOut, + 0 ))) + { + Error( ErrOSErrFromWin32Err( g_pfnRtlNtStatusToDosError( status )) ); + } + Assert( cbKeyLength == KEY_SIZE_AES256*8 ); +#endif + + if ( !NT_SUCCESS( status = g_pfnBCryptGenRandom( NULL, InitVector, sizeof( InitVector ), BCRYPT_USE_SYSTEM_PREFERRED_RNG ) ) ) + { + Error( ErrOSErrFromWin32Err( g_pfnRtlNtStatusToDosError( status ))); + } + // Make copy of InitVector before calling into BCryptEncrypt as it is will modify the init-vector buffer + memcpy_s( trailer.InitVector, sizeof( trailer.InitVector ), InitVector, sizeof( InitVector )); + + if ( !NT_SUCCESS( status = g_pfnBCryptEncrypt( hKey, pbData, *pcbDataLen, NULL, InitVector, sizeof( InitVector ), pbData, cbDataBufLen, pcbDataLen, BCRYPT_BLOCK_PADDING ) ) ) + { + Error( ErrOSErrFromWin32Err( g_pfnRtlNtStatusToDosError( status ))); + } + + // Version+InitVector is appended to the ciphertext + if ( *pcbDataLen + sizeof(trailer) > cbDataBufLen ) + { + Assert( fFalse ); // Messed up the calculation above? + *pcbDataLen += sizeof(trailer); + Error( ErrERRCheck( JET_errBufferTooSmall ) ); + } + Assert( *pcbDataLen + sizeof(trailer) == cbNeeded ); + + trailer.Version = JET_EncryptionAlgorithmAes256; + memcpy_s( pbData + *pcbDataLen, cbDataBufLen - *pcbDataLen, &trailer, sizeof(trailer) ); + *pcbDataLen += sizeof(trailer); + +HandleError: + + Assert( err < JET_errSuccess || *pcbDataLen <= cbDataBufLen ); + + if ( hKey ) + { + g_pfnBCryptDestroyKey( hKey ); + } + + return err; +} + +ERR +ErrOSBCryptDecryptWithAes256( + _In_reads_( *pcbDataLen ) BYTE *pbDataIn, + _Out_writes_bytes_to_(*pcbDataLen, *pcbDataLen) BYTE *pbDataOut, + _Inout_ ULONG *pcbDataLen, + _In_reads_bytes_(cbKey) const BYTE *pbKey, + _In_ ULONG cbKey ) +{ + ERR err = JET_errSuccess; + NTSTATUS status; + AES256KEYEXPANDED *pKey = (AES256KEYEXPANDED *)pbKey; + BCRYPT_KEY_HANDLE hKey = NULL; + ULONG checksum; + AES256BLOBTRAILER *ptrailer; + BYTE InitVector[BLOCK_SIZE_AES256]; + BYTE *pbKeyObject; + + CallR( g_BCryptInitOnce.Init( ErrOSIBCryptAESProviderInit ) ); + + CallR( ErrOSBCryptEncryptionVerifyKey( pbKey, cbKey ) ); + + if ( *pcbDataLen < BLOCK_SIZE_AES256 + sizeof(AES256BLOBTRAILER) || + *pcbDataLen % BLOCK_SIZE_AES256 != sizeof(AES256BLOBTRAILER) % BLOCK_SIZE_AES256 ) + { + return ErrERRCheck( JET_errInvalidParameter ); + } + // Version+InitVector is appended to the ciphertext + ptrailer = (AES256BLOBTRAILER *)(pbDataIn + *pcbDataLen) - 1; + if ( ptrailer->Version != JET_EncryptionAlgorithmAes256 ) + { + return ErrERRCheck( JET_errInvalidParameter ); + } + // Make copy of InitVector before calling into BCryptDecrypt as it is will modify the init-vector buffer + memcpy_s( InitVector, sizeof( InitVector ), ptrailer->InitVector, sizeof( ptrailer->InitVector )); + *pcbDataLen -= sizeof(AES256BLOBTRAILER); + + pbKeyObject = (BYTE *)_alloca( g_cbKeyObject ); + if ( !NT_SUCCESS( status = g_pfnBCryptGenerateSymmetricKey( + g_hBCryptAesAlg, + &hKey, + pbKeyObject, + g_cbKeyObject, + pKey->pbKey, + sizeof( pKey->pbKey ), + 0 ))) + { + return ErrOSErrFromWin32Err( g_pfnRtlNtStatusToDosError( status )); + } +#ifdef DEBUG + ULONG cbKeyLength, cbOut; + if( !NT_SUCCESS( status = g_pfnBCryptGetProperty( + hKey, + BCRYPT_KEY_LENGTH, + (PBYTE)&cbKeyLength, + sizeof(ULONG), + &cbOut, + 0 ))) + { + Error( ErrOSErrFromWin32Err( g_pfnRtlNtStatusToDosError( status )) ); + } + Assert( cbKeyLength == KEY_SIZE_AES256*8 ); +#endif + + // assert that the in/out buffers are not overlapping + Assert( pbDataOut < pbDataIn || pbDataOut >= ( pbDataIn + *pcbDataLen ) ); + Assert( pbDataIn < pbDataOut || pbDataIn >= ( pbDataOut + *pcbDataLen ) ); + + if ( !NT_SUCCESS( status = g_pfnBCryptDecrypt( hKey, pbDataIn, *pcbDataLen, NULL, InitVector, sizeof( InitVector ), pbDataOut, *pcbDataLen, pcbDataLen, BCRYPT_BLOCK_PADDING ) ) ) + { + Error( ErrOSErrFromWin32Err( g_pfnRtlNtStatusToDosError( status ), JET_errDecryptionFailed )); + } + if ( *pcbDataLen < sizeof(checksum) ) + { + Error( ErrERRCheck( JET_errDecryptionFailed ) ); + } + *pcbDataLen -= sizeof(checksum); + // checksum is appended to the plaintext + checksum = *(UnalignedLittleEndian *)(pbDataOut + *pcbDataLen); + + if ( checksum != Crc32Checksum( pbDataOut, *pcbDataLen ) ) + { + Error( ErrERRCheck( JET_errDecryptionFailed ) ); + } + +HandleError: + + if ( hKey ) + { + g_pfnBCryptDestroyKey( hKey ); + } + + return err; +} + diff --git a/dev/ese/src/os/library.cxx b/dev/ese/src/os/library.cxx index 387aa6dd..7ebc940c 100644 --- a/dev/ese/src/os/library.cxx +++ b/dev/ese/src/os/library.cxx @@ -150,7 +150,8 @@ VOID OSLibraryValidateLoaderPolicy( const WCHAR * const mwszzDlls, OSLoadFlags o mwszzDlls == g_mwszzProcessMemLibs || mwszzDlls == g_mwszzWow64Libs || mwszzDlls == g_mwszzEventLogLegacyLibs || - mwszzDlls == g_mwszzEventingProviderLibs + mwszzDlls == g_mwszzEventingProviderLibs || + mwszzDlls == g_mwszzBCryptLib ); // there are a couple reasons to add a library to the strict free required list diff --git a/dev/ese/src/os/osfile.cxx b/dev/ese/src/os/osfile.cxx index 5aca3413..f299b3a5 100644 --- a/dev/ese/src/os/osfile.cxx +++ b/dev/ese/src/os/osfile.cxx @@ -751,7 +751,7 @@ RtlNtStatusToDosError( _In_ NTSTATUS Status ); -static NTOSFuncNtStd( g_pfnRtlNtStatusToDosError, g_mwszzNtdllLibs, RtlNtStatusToDosError, oslfExpectedOnWin5x ); +static NTOSFuncError( g_pfnRtlNtStatusToDosError, g_mwszzNtdllLibs, RtlNtStatusToDosError, oslfExpectedOnWin5x ); ERR COSFile::ErrFlushFileBuffers( _In_ const IOFLUSHREASON iofr, _In_ const FileFlushMode ffm ) { diff --git a/dev/ese/src/os/osstd_.hxx b/dev/ese/src/os/osstd_.hxx index f3286be7..45345881 100644 --- a/dev/ese/src/os/osstd_.hxx +++ b/dev/ese/src/os/osstd_.hxx @@ -92,6 +92,7 @@ using namespace std; #define wszMinUser L"minuser32.dll" // is this on phone? #define wszKernel32Legacy L"kernel32legacy.dll" // win-phone +#define wszBCryptDll L"bcrypt.dll" // base libraries (API sets) @@ -176,6 +177,8 @@ const wchar_t * const g_mwszzAppModelStateLibs = szAppModelState L"\0"; const wchar_t * const g_mwszzEventingProviderLibs = wszEventingProvider L"\0" /* downlevel */ wszAdvapi32 L"\0"; const wchar_t * const g_mwszzEventLogLegacyLibs = wszEventLogLegacy L"\0" /* downlevel */ wszAdvapi32 L"\0"; +const wchar_t * const g_mwszzBCryptLib = wszBCryptDll L"\0"; // BCrypt/CNG APIs + const wchar_t * const g_mwszzKernel32CoreSystemBroken = wszKernel32 L"\0" /* medium level */ wszKernelBase L"\0" /* up-level legacy */ wszKernel32Legacy L"\0"; const wchar_t * const g_mwszzAdvapi32CoreSystemBroken = wszAdvapi32 L"\0"; @@ -196,7 +199,6 @@ const wchar_t * const g_mwszzAdvapi32CoreSystemBroken = wszAdvapi32 L"\0"; const INT rankCritTaskList = 0; -const INT rankAESProv = 1; const INT rankIoStats = 1; const INT rankIOREQ = 2; const INT rankTimerTaskList = 3; // Held only during TimerTask Schedule diff --git a/test/ese/src/devlibtest/oslayer/oslayerunit/encryptiontest.cxx b/test/ese/src/devlibtest/oslayer/oslayerunit/encryptiontest.cxx index cad104d9..5febf123 100644 --- a/test/ese/src/devlibtest/oslayer/oslayerunit/encryptiontest.cxx +++ b/test/ese/src/devlibtest/oslayer/oslayerunit/encryptiontest.cxx @@ -2,41 +2,42 @@ // Licensed under the MIT License. #include "osunitstd.hxx" -CUnitTest( Aes256Encryption, 0, "Test for AES256 Encryption" ); -ERR Aes256Encryption::ErrTest() +ERR Aes256EncryptionTest( BOOL fUseCAPI ) { + AES256_IMPLEMENTATION impl = fUseCAPI ? AES256_CAPI_IMPLEMENTATION : AES256_CNG_IMPLEMENTATION; + COSLayerPreInit oslayer; // FOSPreinit() JET_ERR err = JET_errSuccess; BYTE key[64]; ULONG keySize; - ULONG sizesToTry[] = { 1, 1024, 8120 }; + ULONG sizesToTry[] = { 1, 255, 1024, 8120 }; Call( ErrOSInit() ); // Try null buffer wprintf( L" Generate key: null buffer\n" ); keySize = 0; - OSTestCheckExpectedErr( JET_errBufferTooSmall, ErrOSCreateAes256Key( NULL, &keySize ) ); + OSTestCheckExpectedErr( JET_errBufferTooSmall, ErrOSCreateAes256Key( impl, NULL, &keySize ) ); OSTestCheck( keySize == 49 ); // Try too small key buffer wprintf( L" Generate key: small buffer\n" ); keySize = 4; - OSTestCheckExpectedErr( JET_errBufferTooSmall, ErrOSCreateAes256Key( key, &keySize ) ); + OSTestCheckExpectedErr( JET_errBufferTooSmall, ErrOSCreateAes256Key( impl, key, &keySize ) ); OSTestCheck( keySize == 49 ); keySize = 47; - OSTestCheckExpectedErr( JET_errBufferTooSmall, ErrOSCreateAes256Key( key, &keySize ) ); + OSTestCheckExpectedErr( JET_errBufferTooSmall, ErrOSCreateAes256Key( impl, key, &keySize ) ); OSTestCheck( keySize == 49 ); // Try big enough key buffer wprintf( L" Generate key: good buffer\n" ); keySize = 49; - OSTestCheckErr( ErrOSCreateAes256Key( key, &keySize ) ); + OSTestCheckErr( ErrOSCreateAes256Key( impl, key, &keySize ) ); OSTestCheck( keySize == 49 ); keySize = sizeof(key); - OSTestCheckErr( ErrOSCreateAes256Key( key, &keySize ) ); + OSTestCheckErr( ErrOSCreateAes256Key( impl, key, &keySize ) ); OSTestCheck( keySize == 49 ); wprintf( L" Test encryption/decryption\n" ); @@ -59,18 +60,29 @@ ERR Aes256Encryption::ErrTest() // Too small buffer wprintf( L" Too small buffer\n" ); - OSTestCheckExpectedErr( JET_errBufferTooSmall, ErrOSEncryptWithAes256( pbData, &dataLength, cbNeeded - 1, key, keySize ) ); + OSTestCheckExpectedErr( JET_errBufferTooSmall, ErrOSEncryptWithAes256( impl, pbData, &dataLength, cbNeeded - 1, key, keySize ) ); OSTestCheck( dataLength == cbNeeded ); // Just right wprintf( L" Just Right buffer\n" ); dataLength = sizesToTry[i]; - OSTestCheckErr( ErrOSEncryptWithAes256( pbData, &dataLength, cbNeeded, key, keySize ) ); + OSTestCheckErr( ErrOSEncryptWithAes256( impl, pbData, &dataLength, cbNeeded, key, keySize ) ); OSTestCheck( dataLength == cbNeeded ); // decrypt wprintf( L" Decrypt\n" ); - OSTestCheckErr( ErrOSDecryptWithAes256( pbData, pbData2, &dataLength, key, keySize ) ); + OSTestCheckErr( ErrOSDecryptWithAes256( impl, pbData, pbData2, &dataLength, key, keySize ) ); + OSTestCheck( dataLength == sizesToTry[i] ); + for ( j=0; j Date: Wed, 19 Oct 2022 20:20:30 +0000 Subject: [PATCH 068/102] Raise corruption on hitting RBSFDPToBeDeleted based on variant [Substrate:d6f504e5203c9695602f0b225475cf40229aa19e] --- dev/ese/published/inc/jethdr.w | 2 +- dev/ese/src/ese/fucb.cxx | 2 +- dev/ese/src/ese/sysparamtable.g.cxx | 4 ++-- dev/ese/src/noncore/interop/params.h | 1 + 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/dev/ese/published/inc/jethdr.w b/dev/ese/published/inc/jethdr.w index 2d8878d2..9c081c11 100644 --- a/dev/ese/published/inc/jethdr.w +++ b/dev/ese/published/inc/jethdr.w @@ -4092,7 +4092,7 @@ typedef enum #define JET_paramFlight_ExtentPageCountCacheVerifyOnly 114 // Verify values read from the Extent Page Count Cache rather than just returning them. #define JET_paramFlight_EnablePgnoFDPLastSetTime 115 // whether we want to enable setting PgnoPFDSetTime in the system table for a table entry. #define JET_paramFlight_EnableFDPDeleteFlagCheckOnExtentFreedRedo 116 // whether we want to check if FDP delete flag is set when we redo extent freed LR. -//#define JET_paramFlight_EnableExtentFreed2 117 // whether we want to enable logging ExtentFreed2 LR after the efv upgrade. +#define JET_paramFlight_RBSRaiseCorruptionOnRBSFDPToBeDeleted 117 // Whether we want to raise corruption event/failure item when we hit RBSFDPToBeDeleted error. #define JET_paramFlight_RBSLargeRevertableDeletePages 118 // Large revertable delete size for a table (in pages) beyond which we will track the deletes. #define JET_paramFlight_RBSRevertableDeleteIfTooSoonTimeNull 119 // If set, we will do a revertable table delete even if NonRevertableTableDelete flag is passed provided NonRevertable delete is failing due to JET_errRBSDeleteTableTooSoon due to time not being set. Note: If JET_bitRevertableTableDeleteIfTooSoon is set, this variant is ignored. diff --git a/dev/ese/src/ese/fucb.cxx b/dev/ese/src/ese/fucb.cxx index 5a2ab2cd..375f8e05 100644 --- a/dev/ese/src/ese/fucb.cxx +++ b/dev/ese/src/ese/fucb.cxx @@ -373,7 +373,7 @@ VOID FUCBIllegalOperationFDPToBeDeleted( const OBJID objidFDP ) { // only report the error if not repairing - if ( !g_fRepair ) + if ( !g_fRepair && BoolParam( PinstFromPfucb( pfucb ), JET_paramFlight_RBSRaiseCorruptionOnRBSFDPToBeDeleted ) ) { OSTraceSuspendGC(); WCHAR wszTableName[JET_cbNameMost+1] = L""; diff --git a/dev/ese/src/ese/sysparamtable.g.cxx b/dev/ese/src/ese/sysparamtable.g.cxx index 60c4e679..6f0bd69e 100644 --- a/dev/ese/src/ese/sysparamtable.g.cxx +++ b/dev/ese/src/ese/sysparamtable.g.cxx @@ -130,7 +130,7 @@ JetParam g_rgparamRaw[] = NORMAL_PARAM(JET_paramFlight_ExtentPageCountCacheVerifyOnly, CJetParam::typeBoolean, 1, 1, 0, 0, 0, -1, 0), NORMAL_PARAM(JET_paramFlight_EnablePgnoFDPLastSetTime, CJetParam::typeBoolean, 1, 0, 0, 1, 0, -1, fTrue), NORMAL_PARAM(JET_paramFlight_EnableFDPDeleteFlagCheckOnExtentFreedRedo, CJetParam::typeBoolean, 1, 0, 0, 1, 0, -1, fTrue), - ILLEGAL_PARAM(117), + NORMAL_PARAM(JET_paramFlight_RBSRaiseCorruptionOnRBSFDPToBeDeleted, CJetParam::typeBoolean, 1, 0, 0, 1, 0, -1, fTrue), NORMAL_PARAM(JET_paramFlight_RBSLargeRevertableDeletePages, CJetParam::typeInteger, 1, 0, 0, 0, 0, 2147483647, 0), NORMAL_PARAM(JET_paramFlight_RBSRevertableDeleteIfTooSoonTimeNull, CJetParam::typeBoolean, 1, 0, 0, 0, 0, -1, fFalse), IGNORED_PARAM(JET_paramDBAPageAvailMin, CJetParam::typeInteger, 1, 1, 0, 0, 0, -1, 1280), @@ -363,7 +363,7 @@ static_assert( JET_paramAlternateDatabaseRecoveryPath == 113, "The order of defi static_assert( JET_paramFlight_ExtentPageCountCacheVerifyOnly == 114, "The order of defintion for JET_paramFlight_ExtentPageCountCacheVerifyOnly in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); static_assert( JET_paramFlight_EnablePgnoFDPLastSetTime == 115, "The order of defintion for JET_paramFlight_EnablePgnoFDPLastSetTime in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); static_assert( JET_paramFlight_EnableFDPDeleteFlagCheckOnExtentFreedRedo == 116, "The order of defintion for JET_paramFlight_EnableFDPDeleteFlagCheckOnExtentFreedRedo in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); -static_assert( 117 == 117, "The order of defintion for 117 in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); +static_assert( JET_paramFlight_RBSRaiseCorruptionOnRBSFDPToBeDeleted == 117, "The order of defintion for JET_paramFlight_RBSRaiseCorruptionOnRBSFDPToBeDeleted in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); static_assert( JET_paramFlight_RBSLargeRevertableDeletePages == 118, "The order of defintion for JET_paramFlight_RBSLargeRevertableDeletePages in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); static_assert( JET_paramFlight_RBSRevertableDeleteIfTooSoonTimeNull == 119, "The order of defintion for JET_paramFlight_RBSRevertableDeleteIfTooSoonTimeNull in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); static_assert( JET_paramDBAPageAvailMin == 120, "The order of defintion for JET_paramDBAPageAvailMin in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); diff --git a/dev/ese/src/noncore/interop/params.h b/dev/ese/src/noncore/interop/params.h index 48e19520..7ae5ce74 100644 --- a/dev/ese/src/noncore/interop/params.h +++ b/dev/ese/src/noncore/interop/params.h @@ -183,6 +183,7 @@ MSINTERNAL enum class MJET_PARAM Flight_ExtentPageCountCacheVerifyOnly = 114, // Verify values read from the Extent Page Count Cache rather than just returning them. Flight_EnablePgnoFDPLastSetTime = 115, // whether we want to enable setting PgnoPFDSetTime in the system table for a table entry. Flight_EnableFDPDeleteFlagCheckOnExtentFreedRedo = 116, // whether we want to check if FDP delete flag is set when we redo extent freed LR. + Flight_RBSRaiseCorruptionOnRBSFDPToBeDeleted = 117, // Whether we want to raise corruption event/failure item when we hit RBSFDPToBeDeleted error. Flight_RBSLargeRevertableDeletePages = 118, // Large revertable delete size for a table (in pages) beyond which we will track the deletes. Flight_RBSRevertableDeleteIfTooSoonTimeNull = 119, // If set, we will do a revertable table delete even if NonRevertableTableDelete flag is passed provided NonRevertable delete is failing due to JET_errRBSDeleteTableTooSoon due to time not being set. Note: If JET_bitRevertableTableDeleteIfTooSoon is set, this variant is ignored. IndexTupleIncrement = 132, // for tuple indexes, offset increment for each succesive tuple From c2add905871525d7a656e4e371e5eec74477b931 Mon Sep 17 00:00:00 2001 From: Anil Ruia Date: Thu, 20 Oct 2022 06:52:46 +0000 Subject: [PATCH 069/102] Do not use -501 (JET_errLogFileCorrupt) error for logical corruption in log Raise -1852 (JET_errLogCorrupted) instead. Also raise corruption failure item [Substrate:c3650d57e88b18a29edbf6e8b264f1ce21c57716] --- dev/ese/src/ese/_log/logredo.cxx | 13 +++++++------ dev/ese/src/ese/logdiff.cxx | 21 ++++++++++++++------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/dev/ese/src/ese/_log/logredo.cxx b/dev/ese/src/ese/_log/logredo.cxx index 0f8f0a51..793c36cd 100644 --- a/dev/ese/src/ese/_log/logredo.cxx +++ b/dev/ese/src/ese/_log/logredo.cxx @@ -3165,7 +3165,7 @@ ERR LOG::ErrLGRIRedoNodeOperation( const LRNODE_ *plrnode, ERR *perr ) err = ErrNDValidateSetExternalHeader( csr.Cpage(), &data ); if ( err < JET_errSuccess ) { - OSUHAEmitFailureTag( m_pinst, HaDbFailureTagCorruption, L"630fa9f1-afcd-4998-bb82-db992a6eb22f" ); + OSUHAEmitFailureTag( m_pinst, HaDbFailureTagLogLogicallyInconsistent, L"630fa9f1-afcd-4998-bb82-db992a6eb22f" ); Call( err ); } @@ -3555,7 +3555,7 @@ ERR LOG::ErrLGRIRedoNodeOperation( const LRNODE_ *plrnode, ERR *perr ) // Assert( cb < sizeof( rgbRecNew ) ); if ( cb >= (SIZE_T)g_cbPage ) { - OSUHAEmitFailureTag( m_pinst, HaDbFailureTagCorruption, L"dba4c055-bbbf-4fd1-a56d-e786519803eb" ); + OSUHAEmitFailureTag( m_pinst, HaDbFailureTagLogLogicallyInconsistent, L"dba4c055-bbbf-4fd1-a56d-e786519803eb" ); Error( ErrERRCheck( JET_errLogCorrupted ) ); } @@ -3570,7 +3570,7 @@ ERR LOG::ErrLGRIRedoNodeOperation( const LRNODE_ *plrnode, ERR *perr ) Assert( (ULONG)data.Cb() == cbNewData ); if ( (ULONG)data.Cb() != cbNewData ) { - OSUHAEmitFailureTag( m_pinst, HaDbFailureTagCorruption, L"a3cb57b9-8ba1-496d-a6fc-4fc2f0140fc4" ); + OSUHAEmitFailureTag( m_pinst, HaDbFailureTagLogLogicallyInconsistent, L"a3cb57b9-8ba1-496d-a6fc-4fc2f0140fc4" ); Error( ErrERRCheck( JET_errLogCorrupted ) ); } @@ -7733,8 +7733,8 @@ LOCAL ERR ErrLGIRedoSplitLineinfo( FUCB *pfucb, if ( psplit->clines < 0 || psplit->clines > 1000000 ) { - OSUHAEmitFailureTag( PinstFromPfucb( pfucb ), HaDbFailureTagRecoveryRedoLogCorruption, L"2dfb97c9-80ee-4438-ba68-0d4953cf09ad" ); - return ErrERRCheck( JET_errLogFileCorrupt ); + OSUHAEmitFailureTag( PinstFromPfucb( pfucb ), HaDbFailureTagLogLogicallyInconsistent, L"2dfb97c9-80ee-4438-ba68-0d4953cf09ad" ); + return ErrERRCheck( JET_errLogCorrupted ); } AllocR( psplit->rglineinfo = new LINEINFO[psplit->clines] ); @@ -10993,7 +10993,8 @@ ERR LOG::ErrLGRIRedoMacroOperation( PIB *ppib, DBTIME dbtime ) if ( plr == NULL ) { FireWall( "NullLrOnRedoMacro" ); - return ErrERRCheck( JET_errLogFileCorrupt ); + OSUHAEmitFailureTag( PinstFromPpib( ppib ), HaDbFailureTagLogLogicallyInconsistent, L"5ccd5865-becc-4c71-9eed-14756d0b0397" ); + return ErrERRCheck( JET_errLogCorrupted ); } LRTYP lrtyp = plr->lrtyp; diff --git a/dev/ese/src/ese/logdiff.cxx b/dev/ese/src/ese/logdiff.cxx index 75ad61d8..ca77d840 100644 --- a/dev/ese/src/ese/logdiff.cxx +++ b/dev/ese/src/ese/logdiff.cxx @@ -1178,13 +1178,15 @@ ERR ErrLGGetAfterImage( if ( ibOffsetOld < 0 ) { AssertSz( fFalse, "Buffer underrun detected in ErrLGGetAfterImage: ibOffsetOld < 0" ); - Call( ErrERRCheck( JET_errLogFileCorrupt ) ); + OSUHAEmitFailureTag( PinstFromIfmp( ifmp ), HaDbFailureTagLogLogicallyInconsistent, L"6d68938e-f163-4a3a-b742-683e6bfbbef1" ); + Call( ErrERRCheck( JET_errLogCorrupted ) ); } if ( pbOld + ibOffsetOld < pbOldCur ) { AssertSz( fFalse, "Buffer underrun detected in ErrLGGetAfterImage, pbOld + ibOffsetOld < pbOldCur" ); - Call( ErrERRCheck( JET_errLogFileCorrupt ) ); + OSUHAEmitFailureTag( PinstFromIfmp( ifmp ), HaDbFailureTagLogLogicallyInconsistent, L"e6959992-a3c5-4849-9eea-49185cd840c9" ); + Call( ErrERRCheck( JET_errLogCorrupted ) ); } cbSkip = pbOld + ibOffsetOld - pbOldCur; @@ -1192,7 +1194,8 @@ ERR ErrLGGetAfterImage( (INT_PTR)cbSkip < 0 ) { AssertSz( fFalse, "Buffer corruption detected in ErrLGGetAfterImage" ); - Call( ErrERRCheck( JET_errLogFileCorrupt ) ); + OSUHAEmitFailureTag( PinstFromIfmp( ifmp ), HaDbFailureTagLogLogicallyInconsistent, L"0d959afa-737b-4697-933d-8432d92b1ef0" ); + Call( ErrERRCheck( JET_errLogCorrupted ) ); } UtilMemCpy( pbNewCur, pbOldCur, cbSkip ); @@ -1214,7 +1217,8 @@ ERR ErrLGGetAfterImage( cbDataNew < 0) { AssertSz( fFalse, "Stack corruption detected in ErrLGGetAfterImage after UtilMemCpy called" ); - Call( ErrERRCheck( JET_errLogFileCorrupt ) ); + OSUHAEmitFailureTag( PinstFromIfmp( ifmp ), HaDbFailureTagLogLogicallyInconsistent, L"bc55b9ec-ad44-4821-be93-8513e3b6bcf4" ); + Call( ErrERRCheck( JET_errLogCorrupted ) ); } if ( diffhdr2.FInsert() ) @@ -1252,12 +1256,14 @@ ERR ErrLGGetAfterImage( if ( pbDiffCur > pbDiffMax ) { AssertSz( fFalse, "Buffer overrun detected in ErrLGGetAfterImage, pbDiffCur > pbDiffMax" ); - Call( ErrERRCheck( JET_errLogFileCorrupt ) ); + OSUHAEmitFailureTag( PinstFromIfmp( ifmp ), HaDbFailureTagLogLogicallyInconsistent, L"91c1d32d-3430-485e-b4c4-ed57145a8460" ); + Call( ErrERRCheck( JET_errLogCorrupted ) ); } if ( pbOldCur > pbOld + cbOld ) { AssertSz( fFalse, "Buffer overrun detected in ErrLGGetAfterImage, pbOldCur > pbOld + cbOld" ); - Call( ErrERRCheck( JET_errLogFileCorrupt ) ); + OSUHAEmitFailureTag( PinstFromIfmp( ifmp ), HaDbFailureTagLogLogicallyInconsistent, L"bee21012-3b13-4655-9918-7e1d30312bce" ); + Call( ErrERRCheck( JET_errLogCorrupted ) ); } } @@ -1268,7 +1274,8 @@ ERR ErrLGGetAfterImage( if ( pbNewCur + cbT - pbNew > g_rgfmp[ ifmp ].CbPage() ) { AssertSz( fFalse, "Buffer overrun detected in ErrLGGetAfterImage: data generated is larger than a page" ); - Call( ErrERRCheck( JET_errLogFileCorrupt ) ); + OSUHAEmitFailureTag( PinstFromIfmp( ifmp ), HaDbFailureTagLogLogicallyInconsistent, L"b8e53971-65f2-492d-b0a6-07bd404ba4bd" ); + Call( ErrERRCheck( JET_errLogCorrupted ) ); } UtilMemCpy( pbNewCur, pbOldCur, cbT ); From 41c89941b867abd440fc7e100e5e960c26dab94b Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Thu, 20 Oct 2022 20:53:32 +0000 Subject: [PATCH 070/102] ESE Block Cache: Async IO Worker hang This change contains a rework of the state management of the cache's CRequest's IO. It improves the tracking of explicit state related to IO requests and completions. It also improves the contract with which these are used to perform IO in the multiple rounds we can do (request IO and finalization IO). This should improve debuggability and increase determinism. I don't think this was the root bug however. This change also contains a rework of the sync mechanism used to invoke async IO workers. After the previous change, I did catch a case where we clearly failed to signal the async IO worker properly. This could have been the cause of the hang in prod because a missed signal could leave that async IO work stranded indefinitely. The new scheme uses an atomic update of a single word to control requests to the async IO worker and when that worker needs to run. This scheme should be more reliable than the current scheme. It should also fix some performance issues that caused us to use too many worker pool threads. [Substrate:3f0564af4cee7dc8c7e2ad4213c79877f8a01069] --- .../src/os/blockcache/_hashedlrukcache.hxx | 322 +++++++++++------- .../_hashedlrukcachethreadlocalstorage.hxx | 97 ++++-- 2 files changed, 272 insertions(+), 147 deletions(-) diff --git a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx index a580dd07..4acfcb19 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx @@ -138,7 +138,12 @@ class THashedLRUKCache m_cCachingFileIO( 0 ), m_iorl( this ), m_pfnIORangeLockAcquired( NULL ), - m_keyIORangeLockAcquired( NULL ) + m_keyIORangeLockAcquired( NULL ), + m_fIORangeLockAcquired( fFalse ), + m_fIORequested( fFalse ), + m_pfnWaitForIOComplete( NULL ), + m_keyWaitForIOComplete( NULL ), + m_fWaitForIOCompleted( fFalse ) { m_ilRequestsByIO.InsertAsPrevMost( this ); } @@ -154,7 +159,7 @@ class THashedLRUKCache ERR ErrStatus() const { return THashedLRUKCacheBase::CRequest::ErrStatus(); } typename CHashedLRUKCachedFileTableEntry::CIORangeLockBase* Piorl() { return &m_iorl; } - BOOL FIOCompleted() const { return m_msIO.FEmpty(); } + BOOL FWaitForIOCompleted() const { return !m_fIORequested || m_fWaitForIOCompleted; } BOOL FCacheMiss() const { return m_fCacheMiss; } BOOL FCacheHit() const { return m_fCacheHit; } @@ -198,9 +203,37 @@ class THashedLRUKCache void WaitForIO( _In_opt_ CRequest::PfnIOComplete pfnIOComplete = NULL, _In_opt_ const DWORD_PTR keyIOComplete = NULL ) { + Assert( !m_fWaitForIOCompleted ); + IssueIO(); - m_msIO.Partition( pfnIOComplete, keyIOComplete ); + if ( pfnIOComplete ) + { + m_pfnWaitForIOComplete = pfnIOComplete; + m_keyWaitForIOComplete = keyIOComplete; + + m_msIO.Partition( WaitForIOComplete_, DWORD_PTR( this ) ); + } + else + { + m_msIO.Partition(); + WaitForIOComplete(); + } + } + + void ResetWaitForIO() + { + Assert( FWaitForIOCompleted() ); + + if ( m_msIO.GroupActive() != 0 ) + { + m_msIO.Partition(); + } + + m_pfnWaitForIOComplete = NULL; + m_keyWaitForIOComplete = NULL; + m_fIORequested = fFalse; + m_fWaitForIOCompleted = fFalse; } ERR ErrWriteCluster( _In_ ICachedBlockSlab* const pcbs, @@ -211,6 +244,8 @@ class THashedLRUKCache ERR err = JET_errSuccess; const CClusterWriteCompletionContext* pcwcc = PcwccGetClusterCompletionContext(); + Assert( !m_fWaitForIOCompleted ); + OSTrace( JET_tracetagBlockCacheOperations, OSFormat( "C=%s R=0x%016I64x F=%s Write Cluster %s", OSFormatFileId( Pc() ), @@ -244,6 +279,8 @@ class THashedLRUKCache { ERR err = JET_errSuccess; + Assert( !m_fWaitForIOCompleted ); + OSTrace( JET_tracetagBlockCacheOperations, OSFormat( "C=%s R=0x%016I64x F=%s Read Cluster %s", OSFormatFileId( Pc() ), @@ -270,6 +307,8 @@ class THashedLRUKCache ERR err = JET_errSuccess; BYTE* const pbData = (BYTE*)PbData() + ibOffset - Offsets().IbStart(); + Assert( !m_fWaitForIOCompleted ); + OSTrace( JET_tracetagBlockCacheOperations, OSFormat( "C=%s R=0x%016I64x F=%s Read Block ib=%llu cb=%u", OSFormatFileId( Pc() ), @@ -292,6 +331,8 @@ class THashedLRUKCache ERR err = JET_errSuccess; const BYTE* const pbData = PbData() + ibOffset - Offsets().IbStart(); + Assert( !m_fWaitForIOCompleted ); + OSTrace( JET_tracetagBlockCacheOperations, OSFormat( "C=%s R=0x%016I64x F=%s Write Block ib=%llu cb=%u", OSFormatFileId( Pc() ), @@ -368,12 +409,9 @@ class THashedLRUKCache void Start() override { - THashedLRUKCacheBase::CRequest::Start(); + m_fIORequested = fTrue; - if ( m_msIO.GroupActive() != 0 ) - { - m_msIO.Partition(); - } + THashedLRUKCacheBase::CRequest::Start(); const CMeteredSection::Group group = m_msIO.Enter(); Assert( group == 0 ); @@ -473,6 +511,8 @@ class THashedLRUKCache { m_prequest->m_pfnIORangeLockAcquired( m_prequest->m_keyIORangeLockAcquired ); } + + m_prequest->m_fIORangeLockAcquired = fTrue; } private: @@ -556,6 +596,25 @@ class THashedLRUKCache } } + void WaitForIOComplete() + { + m_fWaitForIOCompleted = fTrue; + + if ( m_pfnWaitForIOComplete ) + { + m_pfnWaitForIOComplete( m_keyWaitForIOComplete ); + } + } + + static void WaitForIOComplete_( _In_ const DWORD_PTR keyWaitForIOComplete ) + { + const CMeteredSection::PFNPARTITIONCOMPLETE pfnPartitionComplete = WaitForIOComplete_; + Unused( pfnPartitionComplete ); + + CRequest* const prequest = (CRequest*)keyWaitForIOComplete; + prequest->WaitForIOComplete(); + } + private: const CClusterWriteCompletionContext m_rgcwcc[ 2 ]; @@ -571,6 +630,11 @@ class THashedLRUKCache CIORangeLock m_iorl; CRequest::PfnIORangeLockAcquired m_pfnIORangeLockAcquired; DWORD_PTR m_keyIORangeLockAcquired; + BOOL m_fIORangeLockAcquired; + BOOL m_fIORequested; + CRequest::PfnIOComplete m_pfnWaitForIOComplete; + DWORD_PTR m_keyWaitForIOComplete; + BOOL m_fWaitForIOCompleted; }; // Update Slab Visitor @@ -4817,6 +4881,7 @@ class THashedLRUKCache _In_ CRequest* const prequestIO ); void WaitForPendingIO( _In_ CRequest* const prequestIO ); BOOL FCompletedIO( _In_ CRequest* const prequestIO ); + void ClearIOCompletion( _In_ CRequest* const prequestIO ); void RequestFinalizeIO( _In_ CRequest* const prequestIO ); void RequestIO( _In_ CRequest* const prequestIO, @@ -8283,168 +8348,171 @@ void THashedLRUKCache::Issue() template void THashedLRUKCache::AsyncIOWorker( _In_ CHashedLRUKCacheThreadLocalStorage* const pctls ) { - BOOL fIORangeLockFailure = fFalse; - CRequest* prequestIONext = NULL; - pctls->BeginAsyncIOWorker(); - // for each issued IO, request an IO range lock in terms of the cached file. these IO range locks not only - // protect against chaotic concurrent IO to overlapping offsets but they also serialize all activity for that - // offset range including things like write back or moving cached blocks in the caching file + while ( !pctls->FTryEndAsyncIOWorker() ) + { + BOOL fIORangeLockFailure = fFalse; + CRequest* prequestIONext = NULL; + + // for each issued IO, request an IO range lock in terms of the cached file. these IO range locks not only + // protect against chaotic concurrent IO to overlapping offsets but they also serialize all activity for that + // offset range including things like write back or moving cached blocks in the caching file - pctls->CritAsyncIOWorkerState().Enter(); - prequestIONext = NULL; - for ( CRequest* prequestIO = pctls->IlIOIssued().PrevMost(); + pctls->CritAsyncIOWorkerState().Enter(); + prequestIONext = NULL; + for ( CRequest* prequestIO = pctls->IlIOIssued().PrevMost(); prequestIO; prequestIO = prequestIONext ) - { - prequestIONext = pctls->IlIOIssued().Next( prequestIO ); - - if ( FWaitForIORangeLock( prequestIO, pctls ) ) - { - pctls->IlIOIssued().Remove( prequestIO ); - pctls->IlIORangeLockPending().InsertAsNextMost( prequestIO ); - } - else { - fIORangeLockFailure = fTrue; + prequestIONext = pctls->IlIOIssued().Next( prequestIO ); + + if ( FWaitForIORangeLock( prequestIO, pctls ) ) + { + pctls->IlIOIssued().Remove( prequestIO ); + pctls->IlIORangeLockPending().InsertAsNextMost( prequestIO ); + } + else + { + fIORangeLockFailure = fTrue; + } } - } - pctls->CritAsyncIOWorkerState().Leave(); + pctls->CritAsyncIOWorkerState().Leave(); - // determine which requested IO range locks have been acquired + // determine which requested IO range locks have been acquired - prequestIONext = NULL; - for ( CRequest* prequestIO = pctls->IlIORangeLockPending().PrevMost(); + prequestIONext = NULL; + for ( CRequest* prequestIO = pctls->IlIORangeLockPending().PrevMost(); prequestIO; prequestIO = prequestIONext ) - { - prequestIONext = pctls->IlIORangeLockPending().Next( prequestIO ); - - if ( prequestIO->Piorl()->FLocked() ) { - pctls->IlIORangeLockPending().Remove( prequestIO ); - pctls->IlIORangeLocked().InsertAsNextMost( prequestIO ); + prequestIONext = pctls->IlIORangeLockPending().Next( prequestIO ); + + if ( prequestIO->Piorl()->FLocked() ) + { + pctls->IlIORangeLockPending().Remove( prequestIO ); + pctls->IlIORangeLocked().InsertAsNextMost( prequestIO ); + } } - } - // for each locked IO, request IO against the cached file and then the caching file. we do this to maximize - // our chances of IO optimization by the underlying file system implementation - // - // NOTE: RequestCachedFileIO / RequestCachingFileIO is touching slabs twice + // for each locked IO, request IO against the cached file and then the caching file. we do this to maximize + // our chances of IO optimization by the underlying file system implementation + // + // NOTE: RequestCachedFileIO / RequestCachingFileIO is touching slabs twice - while ( CRequest* prequestIO = pctls->IlIORangeLocked().PrevMost() ) - { - RequestCachedFileIO( prequestIO ); + while ( CRequest* prequestIO = pctls->IlIORangeLocked().PrevMost() ) + { + RequestCachedFileIO( prequestIO ); - pctls->IlIORangeLocked().Remove( prequestIO ); - pctls->IlCachedFileIORequested().InsertAsNextMost( prequestIO ); - } + pctls->IlIORangeLocked().Remove( prequestIO ); + pctls->IlCachedFileIORequested().InsertAsNextMost( prequestIO ); + } - while ( CRequest* prequestIO = pctls->IlCachedFileIORequested().PrevMost() ) - { - RequestCachingFileIO( prequestIO ); + while ( CRequest* prequestIO = pctls->IlCachedFileIORequested().PrevMost() ) + { + RequestCachingFileIO( prequestIO ); - pctls->IlCachedFileIORequested().Remove( prequestIO ); - pctls->IlCachingFileIORequested().InsertAsNextMost( prequestIO ); - } + pctls->IlCachedFileIORequested().Remove( prequestIO ); + pctls->IlCachingFileIORequested().InsertAsNextMost( prequestIO ); + } - // wait for all IO issued so far but asynchronously + // wait for all IO issued so far but asynchronously - while ( CRequest* prequestIO = pctls->IlCachingFileIORequested().PrevMost() ) - { - WaitForPendingIOAsync( pctls, prequestIO ); + while ( CRequest* prequestIO = pctls->IlCachingFileIORequested().PrevMost() ) + { + WaitForPendingIOAsync( pctls, prequestIO ); - pctls->IlCachingFileIORequested().Remove( prequestIO ); - pctls->IlIOPending().InsertAsNextMost( prequestIO ); - } + pctls->IlCachingFileIORequested().Remove( prequestIO ); + pctls->IlIOPending().InsertAsNextMost( prequestIO ); + } - // for each pending IO, check for any IOs that are complete + // for each pending IO, check for any IOs that are complete - prequestIONext = NULL; - for ( CRequest* prequestIO = pctls->IlIOPending().PrevMost(); + prequestIONext = NULL; + for ( CRequest* prequestIO = pctls->IlIOPending().PrevMost(); prequestIO; prequestIO = prequestIONext ) - { - prequestIONext = pctls->IlIOPending().Next( prequestIO ); - - if ( FCompletedIO( prequestIO ) ) { - pctls->IlIOPending().Remove( prequestIO ); - pctls->IlIOCompleted().InsertAsNextMost( prequestIO ); + prequestIONext = pctls->IlIOPending().Next( prequestIO ); + + if ( FCompletedIO( prequestIO ) ) + { + pctls->IlIOPending().Remove( prequestIO ); + pctls->IlIOCompleted().InsertAsNextMost( prequestIO ); + + ClearIOCompletion( prequestIO ); + } } - } - // for each completed IO, finalize the IO in the cache + // for each completed IO, finalize the IO in the cache - while ( CRequest* prequestIO = pctls->IlIOCompleted().PrevMost() ) - { - RequestFinalizeIO( prequestIO ); + while ( CRequest* prequestIO = pctls->IlIOCompleted().PrevMost() ) + { + RequestFinalizeIO( prequestIO ); - pctls->IlIOCompleted().Remove( prequestIO ); - pctls->IlFinalizeIORequested().InsertAsNextMost( prequestIO ); - } + pctls->IlIOCompleted().Remove( prequestIO ); + pctls->IlFinalizeIORequested().InsertAsNextMost( prequestIO ); + } - // wait for all IO issued so far but asynchronously + // wait for all IO issued so far but asynchronously - while ( CRequest* prequestIO = pctls->IlFinalizeIORequested().PrevMost() ) - { - WaitForPendingIOAsync( pctls, prequestIO ); + while ( CRequest* prequestIO = pctls->IlFinalizeIORequested().PrevMost() ) + { + WaitForPendingIOAsync( pctls, prequestIO ); - pctls->IlFinalizeIORequested().Remove( prequestIO ); - pctls->IlFinalizeIOPending().InsertAsNextMost( prequestIO ); - } + pctls->IlFinalizeIORequested().Remove( prequestIO ); + pctls->IlFinalizeIOPending().InsertAsNextMost( prequestIO ); + } - // for each pending finalize IO, check for any IOs that are complete + // for each pending finalize IO, check for any IOs that are complete - prequestIONext = NULL; - for ( CRequest* prequestIO = pctls->IlFinalizeIOPending().PrevMost(); + prequestIONext = NULL; + for ( CRequest* prequestIO = pctls->IlFinalizeIOPending().PrevMost(); prequestIO; prequestIO = prequestIONext ) - { - prequestIONext = pctls->IlFinalizeIOPending().Next( prequestIO ); - - if ( FCompletedIO( prequestIO ) ) { - pctls->IlFinalizeIOPending().Remove( prequestIO ); - pctls->IlFinalizeIOCompleted().InsertAsNextMost( prequestIO ); - } - } - - // for each finalized IO, release the IO range locks and trigger the IO completion + prequestIONext = pctls->IlFinalizeIOPending().Next( prequestIO ); - while ( CRequest* prequestIO = pctls->IlFinalizeIOCompleted().PrevMost() ) - { - pctls->IlFinalizeIOCompleted().Remove( prequestIO ); + if ( FCompletedIO( prequestIO ) ) + { + pctls->IlFinalizeIOPending().Remove( prequestIO ); + pctls->IlFinalizeIOCompleted().InsertAsNextMost( prequestIO ); + } + } - ReleaseIORangeLock( prequestIO, pctls ); + // for each finalized IO, release the IO range locks and trigger the IO completion - CRequest* prequestNext = NULL; - for ( CRequest* prequest = prequestIO->IlRequestsByIO().PrevMost(); - prequest; - prequest = prequestNext ) + while ( CRequest* prequestIO = pctls->IlFinalizeIOCompleted().PrevMost() ) { - prequestNext = prequestIO->IlRequestsByIO().Next( prequest ); + pctls->IlFinalizeIOCompleted().Remove( prequestIO ); - prequestIO->IlRequestsByIO().Remove( prequest ); + ReleaseIORangeLock( prequestIO, pctls ); - if ( prequest != prequestIO ) + CRequest* prequestNext = NULL; + for ( CRequest* prequest = prequestIO->IlRequestsByIO().PrevMost(); + prequest; + prequest = prequestNext ) { - pctls->RemoveRequest( prequest ); + prequestNext = prequestIO->IlRequestsByIO().Next( prequest ); + + prequestIO->IlRequestsByIO().Remove( prequest ); + + if ( prequest != prequestIO ) + { + pctls->RemoveRequest( prequest ); + } } - } - pctls->RemoveRequest( prequestIO ); - } + pctls->RemoveRequest( prequestIO ); + } - // if we failed to get an IO Range Lock and we currently have no IO Range Locks then we should try to issue again + // if we failed to get an IO Range Lock and we currently have no IO Range Locks then we should try to issue again - if ( fIORangeLockFailure && pctls->CIORangeLocked() == 0 ) - { - pctls->CueAsyncIOWorker(); + if ( fIORangeLockFailure && pctls->CIORangeLocked() == 0 ) + { + pctls->CueAsyncIOWorker(); + } } - - pctls->EndAsyncIOWorker(); } template @@ -8466,6 +8534,7 @@ ERR THashedLRUKCache::ErrSynchronousIO( _In_ CRequest* const prequest ) // wait for all the IO to complete even if it is not needed for finalization WaitForPendingIO( prequest ); + ClearIOCompletion( prequest ); // finalize the IO in the cache @@ -8698,7 +8767,7 @@ BOOL THashedLRUKCache::FCompletedIO( _In_ CRequest* const prequestIO ) prequest; prequest = prequestIO->IlRequestsByIO().Next( prequest ) ) { - if ( !prequest->FIOCompleted() ) + if ( !prequest->FWaitForIOCompleted() ) { return fFalse; } @@ -8707,6 +8776,17 @@ BOOL THashedLRUKCache::FCompletedIO( _In_ CRequest* const prequestIO ) return fTrue; } +template +void THashedLRUKCache::ClearIOCompletion( _In_ CRequest* const prequestIO ) +{ + for ( CRequest* prequest = prequestIO->IlRequestsByIO().PrevMost(); + prequest; + prequest = prequestIO->IlRequestsByIO().Next( prequest ) ) + { + prequest->ResetWaitForIO(); + } +} + template void THashedLRUKCache::RequestFinalizeIO( _In_ CRequest* const prequestIO ) { diff --git a/dev/ese/src/os/blockcache/_hashedlrukcachethreadlocalstorage.hxx b/dev/ese/src/os/blockcache/_hashedlrukcachethreadlocalstorage.hxx index 4a2a1b14..62a0d80b 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcachethreadlocalstorage.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcachethreadlocalstorage.hxx @@ -17,8 +17,7 @@ class CHashedLRUKCacheThreadLocalStorage // ctls : CCacheThreadLocalStorageBase( ctid ), m_pc( NULL ), m_ptpwIssue( NULL ), - m_semAsyncIOWorkerRequest( CSyncBasicInfo( "CHashedLRUKCacheThreadLocalStorage::m_semAsyncIOWorkerRequest" ) ), - m_semAsyncIOWorkerExecute( CSyncBasicInfo( "CHashedLRUKCacheThreadLocalStorage::m_semAsyncIOWorkerExecute" ) ), + m_cwAsyncIOWorkerState( ControlWord::cwNone ), m_ctidAsyncIOWorker( ctidInvalid ), m_critAsyncIOWorkerState( CLockBasicInfo( CSyncBasicInfo( "CHashedLRUKCacheThreadLocalStorage::m_critAsyncIOWorkerState" ), rankIssued, 0 ) ), m_rgibSlab { 0 }, @@ -27,9 +26,6 @@ class CHashedLRUKCacheThreadLocalStorage // ctls m_cIORangeLocked( 0 ), m_cbIORangeLocked( 0 ) { - m_semAsyncIOWorkerRequest.Release(); - m_semAsyncIOWorkerRequest.Release(); - m_semAsyncIOWorkerExecute.Release(); } void Initialize( _In_ THashedLRUKCache* const pc, _Inout_ TP_WORK** const pptpwIssue ) @@ -186,36 +182,73 @@ class CHashedLRUKCacheThreadLocalStorage // ctls void BeginAsyncIOWorker() { - // serialize execution of the async IO worker because more than one can be requested and executing concurrently - - m_semAsyncIOWorkerExecute.Acquire(); m_ctidAsyncIOWorker = CtidCurrentThread(); + + OSTrace( JET_tracetagBlockCacheOperations, + OSFormat( "C=%s BeginAsyncIOWorker .%x", + OSFormatFileId( Pc() ), + Ctid() ) ); } - void EndAsyncIOWorker() + BOOL FTryEndAsyncIOWorker() { - // enable another async IO worker task to execute + BOOL fNewRequest = fFalse; - m_ctidAsyncIOWorker = ctidInvalid; - m_semAsyncIOWorkerExecute.Release(); + // if the worker is requested and running then clear the requested state + // if the worker is not requested and running then clear the running state - // allow another async IO worker request to be made + OSSYNC_FOREVER + { + const ControlWord cwAsyncIOWorkerStateBIExpected = (ControlWord)AtomicRead( (LONG*)&m_cwAsyncIOWorkerState ); + const ControlWord cwAsyncIOWorkerStateAI = cwAsyncIOWorkerStateBIExpected == ControlWord::cwRunning ? + ControlWord::cwNone : + ControlWord::cwRunning; + const ControlWord cwAsyncIOWorkerStateBI = (ControlWord)AtomicCompareExchange( (LONG*)&m_cwAsyncIOWorkerState, + (LONG)cwAsyncIOWorkerStateBIExpected, + (LONG)cwAsyncIOWorkerStateAI ); + + if ( cwAsyncIOWorkerStateBI == cwAsyncIOWorkerStateBIExpected ) + { + fNewRequest = cwAsyncIOWorkerStateBI == ControlWord::cwRequestedAndRunning; + break; + } + } - m_semAsyncIOWorkerRequest.Release(); + OSTrace( JET_tracetagBlockCacheOperations, + OSFormat( "C=%s FTryEndAsyncIOWorker .%x = %s", + OSFormatFileId( Pc() ), + Ctid(), + OSFormatBoolean( fNewRequest ) ) ); - // release the ref count for this request + // if we are no longer running then release the ref count for the async IO worker - CHashedLRUKCacheThreadLocalStorage* pctlsT = this; - Release( &pctlsT ); + if ( !fNewRequest ) + { + CHashedLRUKCacheThreadLocalStorage* pctlsT = this; + Release( &pctlsT ); + } + + // we have succeeded in ending if there is no new request + + return !fNewRequest; } void CueAsyncIOWorker() { - // try to get a token to request the async IO worker + // request the async IO worker + + const ControlWord cwAsyncIOWorkerStateBI = (ControlWord)AtomicExchange( (LONG*)&m_cwAsyncIOWorkerState, + (LONG)ControlWord::cwRequestedAndRunning ); + + const BOOL fNewRequest = ( cwAsyncIOWorkerStateBI == ControlWord::cwNone || + cwAsyncIOWorkerStateBI == ControlWord::cwRunning ); + const BOOL fSignalNeeded = cwAsyncIOWorkerStateBI == ControlWord::cwNone; - if ( m_semAsyncIOWorkerRequest.FTryAcquire() ) + // signal the async IO worker if necessary + + if ( fSignalNeeded ) { - // add a ref count for this request + // add a ref count for the async IO worker AddRef(); @@ -223,6 +256,13 @@ class CHashedLRUKCacheThreadLocalStorage // ctls SubmitThreadpoolWork( PtpwIssue() ); } + + OSTrace( JET_tracetagBlockCacheOperations, + OSFormat( "C=%s CueAsyncIOWorker .%x fNewRequest = %s fSignalNeeded = %s", + OSFormatFileId( Pc() ), + Ctid(), + OSFormatBoolean( fNewRequest ), + OSFormatBoolean( fSignalNeeded ) ) ); } static void CueAsyncIOWorker_( _In_ const DWORD_PTR keyIOComplete ) @@ -241,21 +281,26 @@ class CHashedLRUKCacheThreadLocalStorage // ctls ~CHashedLRUKCacheThreadLocalStorage() { - m_semAsyncIOWorkerRequest.Acquire(); - m_semAsyncIOWorkerRequest.Acquire(); - m_semAsyncIOWorkerExecute.Acquire(); - m_pc->ReleaseThreadpoolState( &m_ptpwIssue ); } + private: + + enum class ControlWord : LONG + { + cwNone = 0, + cwRequested = 1, + cwRunning = 2, + cwRequestedAndRunning = 3, + }; + private: THashedLRUKCache* m_pc; TP_WORK* m_ptpwIssue; CCountedInvasiveList m_ilIORequested; - CSemaphore m_semAsyncIOWorkerRequest; - CSemaphore m_semAsyncIOWorkerExecute; + volatile ControlWord m_cwAsyncIOWorkerState; CacheThreadId m_ctidAsyncIOWorker; CCountedInvasiveList m_ilIORangeLockPending; CCountedInvasiveList m_ilIORangeLocked; From 628b215ca8451b7131b3e8d1e3c194b3838d1a7a Mon Sep 17 00:00:00 2001 From: Anil Ruia Date: Fri, 21 Oct 2022 15:57:23 +0000 Subject: [PATCH 071/102] Variant config setting to use new CNG AES256 implementation [Substrate:51d0450379b234e8fc4379dd5f551e97851a7b0f] --- dev/ese/published/inc/jethdr.w | 6 +++++- dev/ese/src/ese/_osu/encryptu.cxx | 28 ++++++++++++++++++++++++++-- dev/ese/src/ese/info.cxx | 8 ++++++-- dev/ese/src/ese/jetapi.cxx | 12 +++++++++++- dev/ese/src/ese/sysparamtable.g.cxx | 4 +++- dev/ese/src/inc/daedef.hxx | 3 +++ dev/ese/src/noncore/interop/params.h | 3 ++- 7 files changed, 56 insertions(+), 8 deletions(-) diff --git a/dev/ese/published/inc/jethdr.w b/dev/ese/published/inc/jethdr.w index 9c081c11..bcb174e1 100644 --- a/dev/ese/published/inc/jethdr.w +++ b/dev/ese/published/inc/jethdr.w @@ -4251,7 +4251,11 @@ typedef enum #define JET_paramEnableBlockCacheDetach 220 // Indicates that ESE Block Cache detach is enabled. This will allow a file cached by the ESE Block Cache to be detached on open. -#define JET_paramMaxValueInvalid 221 // This is not a valid parameter. It can change from release to release! +// end_PubEsent +#define JET_paramFlight_UseCngAes256Implementation 221 // Whether to use the CNG based implementation (rather than CAPI based one) for AES256 encryption +// begin_PubEsent + +#define JET_paramMaxValueInvalid 222 // This is not a valid parameter. It can change from release to release! // end_PubEsent #if ( JET_VERSION >= 0x0A01 ) diff --git a/dev/ese/src/ese/_osu/encryptu.cxx b/dev/ese/src/ese/_osu/encryptu.cxx index 731059b0..66e9d385 100644 --- a/dev/ese/src/ese/_osu/encryptu.cxx +++ b/dev/ese/src/ese/_osu/encryptu.cxx @@ -75,7 +75,31 @@ ErrOSUEncrypt( PERFOpt( cEncryptionCalls.Inc( iInstance, tce ) ); const HRT hrtStart = HrtHRTCount(); - const ERR err = ErrOSEncryptWithAes256( AES256_CAPI_IMPLEMENTATION, pbData, pcbDataLen, cbDataBufLen, pfucbTable->pbEncryptionKey, pfucbTable->cbEncryptionKey ); +#ifdef DEBUG + ULONG cbDataIn = *pcbDataLen; + BYTE *pbDataCopy = new BYTE[cbDataIn]; + if ( pbDataCopy != NULL ) + { + memcpy_s( pbDataCopy, cbDataIn, pbData, cbDataIn ); + } +#endif + const ERR err = ErrOSEncryptWithAes256( PARAM_AES256_IMPLEMENTATION, pbData, pcbDataLen, cbDataBufLen, pfucbTable->pbEncryptionKey, pfucbTable->cbEncryptionKey ); +#ifdef DEBUG + // On debug, verify that other encryption implementation can decrypt the result back to the original buffer. + if ( err >= JET_errSuccess && pbDataCopy != NULL ) + { + ULONG cbDataOut = *pcbDataLen; + BYTE *pbDataOutCopy = new BYTE[cbDataOut]; + if ( pbDataOutCopy != NULL ) + { + CallS( ErrOSDecryptWithAes256( OTHER_AES256_IMPLEMENTATION, pbData, pbDataOutCopy, &cbDataOut, pfucbTable->pbEncryptionKey, pfucbTable->cbEncryptionKey ) ); + Assert( cbDataIn == cbDataOut ); + Assert( memcmp( pbDataCopy, pbDataOutCopy, cbDataIn ) == 0 ); + delete[] pbDataOutCopy; + } + } + delete[] pbDataCopy; +#endif PERFOpt( cEncryptionTotalDhrts.Add( iInstance, tce, HrtHRTCount() - hrtStart ) ); return err; @@ -100,7 +124,7 @@ ErrOSUDecrypt( PERFOpt( cDecryptionCalls.Inc( iInstance, tce ) ); const HRT hrtStart = HrtHRTCount(); - const ERR err = ErrOSDecryptWithAes256( AES256_CAPI_IMPLEMENTATION, pbDataIn, pbDataOut, pcbDataLen, pfucbTable->pbEncryptionKey, pfucbTable->cbEncryptionKey ); + const ERR err = ErrOSDecryptWithAes256( PARAM_AES256_IMPLEMENTATION, pbDataIn, pbDataOut, pcbDataLen, pfucbTable->pbEncryptionKey, pfucbTable->cbEncryptionKey ); PERFOpt( cDecryptionTotalDhrts.Add( iInstance, tce, HrtHRTCount() - hrtStart ) ); if ( err == JET_errDecryptionFailed ) diff --git a/dev/ese/src/ese/info.cxx b/dev/ese/src/ese/info.cxx index c7d6f571..06e4e4e8 100644 --- a/dev/ese/src/ese/info.cxx +++ b/dev/ese/src/ese/info.cxx @@ -1273,12 +1273,16 @@ ERR VTAPI ErrIsamSetTableInfo( FUCBRemoveEncryptionKey( pfucb ); if ( cbParam > 0 ) { - err = ErrOSEncryptionVerifyKey( AES256_CAPI_IMPLEMENTATION, (BYTE*)pvParam, cbParam ); + err = ErrOSEncryptionVerifyKey( PARAM_AES256_IMPLEMENTATION, (BYTE*)pvParam, cbParam ); if ( err < JET_errSuccess ) { AssertSz( fFalse, "Client is giving us a bad key" ); return err; } +#ifdef DEBUG + // On debug, also verify with the other implementation + CallS( ErrOSEncryptionVerifyKey( OTHER_AES256_IMPLEMENTATION, (BYTE*)pvParam, cbParam ) ); +#endif AllocR( pfucb->pbEncryptionKey = (BYTE*)PvOSMemoryHeapAlloc( cbParam ) ); memcpy( pfucb->pbEncryptionKey, pvParam, cbParam ); pfucb->cbEncryptionKey = cbParam; @@ -1606,7 +1610,7 @@ ERR VTAPI ErrIsamGetTableInfo( #ifdef DEBUG if ( pfucb->cbEncryptionKey > 0 ) { - ERR errT = ErrOSEncryptionVerifyKey( AES256_CAPI_IMPLEMENTATION, pfucb->pbEncryptionKey, pfucb->cbEncryptionKey ); + ERR errT = ErrOSEncryptionVerifyKey( PARAM_AES256_IMPLEMENTATION, pfucb->pbEncryptionKey, pfucb->cbEncryptionKey ); if ( errT < JET_errSuccess ) { AssertSz( fFalse, "Client should not have been able to save a bad encryption key" ); diff --git a/dev/ese/src/ese/jetapi.cxx b/dev/ese/src/ese/jetapi.cxx index 5a924747..3181db37 100644 --- a/dev/ese/src/ese/jetapi.cxx +++ b/dev/ese/src/ese/jetapi.cxx @@ -7526,6 +7526,8 @@ const #define JET_paramFlight_RBSCleanupEnabledDEFAULT OnDebugOrRetail( fTrue, fFalse ) +#define JET_paramFlight_UseCngAes256ImplementationDEFAULT OnDebugOrRetail( fTrue, fFalse ) + // ================================================================ // The following file is auto-generated from sysparam.xml. // To modify or add parameters, edit sysparam.xml and run gengen.bat. @@ -9530,7 +9532,15 @@ LOCAL JET_ERR JET_API JetCreateEncryptionKeyEx( return ErrERRCheck( JET_errInvalidParameter ); } *pcbActual = cbKey; - return ErrOSCreateAes256Key( AES256_CAPI_IMPLEMENTATION, (BYTE*)pvKey, pcbActual ); + ERR err = ErrOSCreateAes256Key( PARAM_AES256_IMPLEMENTATION, (BYTE*)pvKey, pcbActual ); +#ifdef DEBUG + // On debug, verify that key works with the other implementation + if ( err >= JET_errSuccess ) + { + CallS( ErrOSEncryptionVerifyKey( OTHER_AES256_IMPLEMENTATION, (BYTE*)pvKey, *pcbActual ) ); + } +#endif + return err; } JET_ERR JET_API JetCreateEncryptionKey( diff --git a/dev/ese/src/ese/sysparamtable.g.cxx b/dev/ese/src/ese/sysparamtable.g.cxx index 6f0bd69e..4da5acbb 100644 --- a/dev/ese/src/ese/sysparamtable.g.cxx +++ b/dev/ese/src/ese/sysparamtable.g.cxx @@ -240,6 +240,7 @@ JetParam g_rgparamRaw[] = NORMAL_PARAM(JET_paramEnableBlockCache, CJetParam::typeBoolean, 1, 1, 1, 0, 0, 1, 0), NORMAL_PARAM(JET_paramDeferredIndexPopulateRowsPerTransaction, CJetParam::typeInteger, 1, 0, 0, 0, 1, 2147483647, 1000), NORMAL_PARAM(JET_paramEnableBlockCacheDetach, CJetParam::typeBoolean, 1, 1, 1, 0, 0, 1, 0), + NORMAL_PARAM(JET_paramFlight_UseCngAes256Implementation, CJetParam::typeBoolean, 0, 1, 0, 0, 0, 1, JET_paramFlight_UseCngAes256ImplementationDEFAULT), ILLEGAL_PARAM(JET_paramMaxValueInvalid), }; @@ -467,4 +468,5 @@ static_assert( JET_paramPerfmonRefreshInterval == 217, "The order of defintion f static_assert( JET_paramEnableBlockCache == 218, "The order of defintion for JET_paramEnableBlockCache in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); static_assert( JET_paramDeferredIndexPopulateRowsPerTransaction == 219, "The order of defintion for JET_paramDeferredIndexPopulateRowsPerTransaction in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); static_assert( JET_paramEnableBlockCacheDetach == 220, "The order of defintion for JET_paramEnableBlockCacheDetach in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); -static_assert( JET_paramMaxValueInvalid == 221, "The order of defintion for JET_paramMaxValueInvalid in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); +static_assert( JET_paramFlight_UseCngAes256Implementation == 221, "The order of defintion for JET_paramFlight_UseCngAes256Implementation in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); +static_assert( JET_paramMaxValueInvalid == 222, "The order of defintion for JET_paramMaxValueInvalid in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); diff --git a/dev/ese/src/inc/daedef.hxx b/dev/ese/src/inc/daedef.hxx index 8c44620d..136250dd 100644 --- a/dev/ese/src/inc/daedef.hxx +++ b/dev/ese/src/inc/daedef.hxx @@ -5874,3 +5874,6 @@ INLINE ERR ErrFromCStatsErr( const CStats::ERR err ) return ErrERRCheck( JET_errInternalError ); } } + +#define PARAM_AES256_IMPLEMENTATION ( BoolParam( JET_paramFlight_UseCngAes256Implementation ) ? AES256_CNG_IMPLEMENTATION : AES256_CAPI_IMPLEMENTATION ) +#define OTHER_AES256_IMPLEMENTATION ( BoolParam( JET_paramFlight_UseCngAes256Implementation ) ? AES256_CAPI_IMPLEMENTATION : AES256_CNG_IMPLEMENTATION ) diff --git a/dev/ese/src/noncore/interop/params.h b/dev/ese/src/noncore/interop/params.h index 7ae5ce74..25ce8a68 100644 --- a/dev/ese/src/noncore/interop/params.h +++ b/dev/ese/src/noncore/interop/params.h @@ -242,7 +242,8 @@ MSINTERNAL enum class MJET_PARAM EnableBlockCache = 218, // Indicates that the ESE Block Cache is enabled. This is sufficient to access files previously attached to the ESE Block Cache but not to attach new files. DeferredIndexPopulateRowsPerTransaction = 219, // Number of primary index rows to process in a single transaction when processing EnableBlockCacheDetach = 220, // Indicates that ESE Block Cache detach is enabled. This will allow a file cached by the ESE Block Cache to be detached on open. - MaxValueInvalid = 221, // This is not a valid parameter. It can change from release to release! + Flight_UseCngAes256Implementation = 221, // Whether to use the CNG based implementation (rather than CAPI based one) for AES256 encryption + MaxValueInvalid = 222, // This is not a valid parameter. It can change from release to release! }; } From 2e23878cb0d79781588679ea479af874d8fe1f34 Mon Sep 17 00:00:00 2001 From: Build Team Date: Fri, 21 Oct 2022 16:54:08 +0000 Subject: [PATCH 072/102] This is a fast revert. The justification for the revert is: This change is causing crashes in inner ring. The reverted PR age: 20.01 hours old. [Substrate:6b856e64cde718dd81a22e63edc8335d6559e5a6] --- .../src/os/blockcache/_hashedlrukcache.hxx | 322 +++++++----------- .../_hashedlrukcachethreadlocalstorage.hxx | 97 ++---- 2 files changed, 147 insertions(+), 272 deletions(-) diff --git a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx index 4acfcb19..a580dd07 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx @@ -138,12 +138,7 @@ class THashedLRUKCache m_cCachingFileIO( 0 ), m_iorl( this ), m_pfnIORangeLockAcquired( NULL ), - m_keyIORangeLockAcquired( NULL ), - m_fIORangeLockAcquired( fFalse ), - m_fIORequested( fFalse ), - m_pfnWaitForIOComplete( NULL ), - m_keyWaitForIOComplete( NULL ), - m_fWaitForIOCompleted( fFalse ) + m_keyIORangeLockAcquired( NULL ) { m_ilRequestsByIO.InsertAsPrevMost( this ); } @@ -159,7 +154,7 @@ class THashedLRUKCache ERR ErrStatus() const { return THashedLRUKCacheBase::CRequest::ErrStatus(); } typename CHashedLRUKCachedFileTableEntry::CIORangeLockBase* Piorl() { return &m_iorl; } - BOOL FWaitForIOCompleted() const { return !m_fIORequested || m_fWaitForIOCompleted; } + BOOL FIOCompleted() const { return m_msIO.FEmpty(); } BOOL FCacheMiss() const { return m_fCacheMiss; } BOOL FCacheHit() const { return m_fCacheHit; } @@ -203,37 +198,9 @@ class THashedLRUKCache void WaitForIO( _In_opt_ CRequest::PfnIOComplete pfnIOComplete = NULL, _In_opt_ const DWORD_PTR keyIOComplete = NULL ) { - Assert( !m_fWaitForIOCompleted ); - IssueIO(); - if ( pfnIOComplete ) - { - m_pfnWaitForIOComplete = pfnIOComplete; - m_keyWaitForIOComplete = keyIOComplete; - - m_msIO.Partition( WaitForIOComplete_, DWORD_PTR( this ) ); - } - else - { - m_msIO.Partition(); - WaitForIOComplete(); - } - } - - void ResetWaitForIO() - { - Assert( FWaitForIOCompleted() ); - - if ( m_msIO.GroupActive() != 0 ) - { - m_msIO.Partition(); - } - - m_pfnWaitForIOComplete = NULL; - m_keyWaitForIOComplete = NULL; - m_fIORequested = fFalse; - m_fWaitForIOCompleted = fFalse; + m_msIO.Partition( pfnIOComplete, keyIOComplete ); } ERR ErrWriteCluster( _In_ ICachedBlockSlab* const pcbs, @@ -244,8 +211,6 @@ class THashedLRUKCache ERR err = JET_errSuccess; const CClusterWriteCompletionContext* pcwcc = PcwccGetClusterCompletionContext(); - Assert( !m_fWaitForIOCompleted ); - OSTrace( JET_tracetagBlockCacheOperations, OSFormat( "C=%s R=0x%016I64x F=%s Write Cluster %s", OSFormatFileId( Pc() ), @@ -279,8 +244,6 @@ class THashedLRUKCache { ERR err = JET_errSuccess; - Assert( !m_fWaitForIOCompleted ); - OSTrace( JET_tracetagBlockCacheOperations, OSFormat( "C=%s R=0x%016I64x F=%s Read Cluster %s", OSFormatFileId( Pc() ), @@ -307,8 +270,6 @@ class THashedLRUKCache ERR err = JET_errSuccess; BYTE* const pbData = (BYTE*)PbData() + ibOffset - Offsets().IbStart(); - Assert( !m_fWaitForIOCompleted ); - OSTrace( JET_tracetagBlockCacheOperations, OSFormat( "C=%s R=0x%016I64x F=%s Read Block ib=%llu cb=%u", OSFormatFileId( Pc() ), @@ -331,8 +292,6 @@ class THashedLRUKCache ERR err = JET_errSuccess; const BYTE* const pbData = PbData() + ibOffset - Offsets().IbStart(); - Assert( !m_fWaitForIOCompleted ); - OSTrace( JET_tracetagBlockCacheOperations, OSFormat( "C=%s R=0x%016I64x F=%s Write Block ib=%llu cb=%u", OSFormatFileId( Pc() ), @@ -409,10 +368,13 @@ class THashedLRUKCache void Start() override { - m_fIORequested = fTrue; - THashedLRUKCacheBase::CRequest::Start(); + if ( m_msIO.GroupActive() != 0 ) + { + m_msIO.Partition(); + } + const CMeteredSection::Group group = m_msIO.Enter(); Assert( group == 0 ); } @@ -511,8 +473,6 @@ class THashedLRUKCache { m_prequest->m_pfnIORangeLockAcquired( m_prequest->m_keyIORangeLockAcquired ); } - - m_prequest->m_fIORangeLockAcquired = fTrue; } private: @@ -596,25 +556,6 @@ class THashedLRUKCache } } - void WaitForIOComplete() - { - m_fWaitForIOCompleted = fTrue; - - if ( m_pfnWaitForIOComplete ) - { - m_pfnWaitForIOComplete( m_keyWaitForIOComplete ); - } - } - - static void WaitForIOComplete_( _In_ const DWORD_PTR keyWaitForIOComplete ) - { - const CMeteredSection::PFNPARTITIONCOMPLETE pfnPartitionComplete = WaitForIOComplete_; - Unused( pfnPartitionComplete ); - - CRequest* const prequest = (CRequest*)keyWaitForIOComplete; - prequest->WaitForIOComplete(); - } - private: const CClusterWriteCompletionContext m_rgcwcc[ 2 ]; @@ -630,11 +571,6 @@ class THashedLRUKCache CIORangeLock m_iorl; CRequest::PfnIORangeLockAcquired m_pfnIORangeLockAcquired; DWORD_PTR m_keyIORangeLockAcquired; - BOOL m_fIORangeLockAcquired; - BOOL m_fIORequested; - CRequest::PfnIOComplete m_pfnWaitForIOComplete; - DWORD_PTR m_keyWaitForIOComplete; - BOOL m_fWaitForIOCompleted; }; // Update Slab Visitor @@ -4881,7 +4817,6 @@ class THashedLRUKCache _In_ CRequest* const prequestIO ); void WaitForPendingIO( _In_ CRequest* const prequestIO ); BOOL FCompletedIO( _In_ CRequest* const prequestIO ); - void ClearIOCompletion( _In_ CRequest* const prequestIO ); void RequestFinalizeIO( _In_ CRequest* const prequestIO ); void RequestIO( _In_ CRequest* const prequestIO, @@ -8348,171 +8283,168 @@ void THashedLRUKCache::Issue() template void THashedLRUKCache::AsyncIOWorker( _In_ CHashedLRUKCacheThreadLocalStorage* const pctls ) { - pctls->BeginAsyncIOWorker(); + BOOL fIORangeLockFailure = fFalse; + CRequest* prequestIONext = NULL; - while ( !pctls->FTryEndAsyncIOWorker() ) - { - BOOL fIORangeLockFailure = fFalse; - CRequest* prequestIONext = NULL; + pctls->BeginAsyncIOWorker(); - // for each issued IO, request an IO range lock in terms of the cached file. these IO range locks not only - // protect against chaotic concurrent IO to overlapping offsets but they also serialize all activity for that - // offset range including things like write back or moving cached blocks in the caching file + // for each issued IO, request an IO range lock in terms of the cached file. these IO range locks not only + // protect against chaotic concurrent IO to overlapping offsets but they also serialize all activity for that + // offset range including things like write back or moving cached blocks in the caching file - pctls->CritAsyncIOWorkerState().Enter(); - prequestIONext = NULL; - for ( CRequest* prequestIO = pctls->IlIOIssued().PrevMost(); + pctls->CritAsyncIOWorkerState().Enter(); + prequestIONext = NULL; + for ( CRequest* prequestIO = pctls->IlIOIssued().PrevMost(); prequestIO; prequestIO = prequestIONext ) - { - prequestIONext = pctls->IlIOIssued().Next( prequestIO ); + { + prequestIONext = pctls->IlIOIssued().Next( prequestIO ); - if ( FWaitForIORangeLock( prequestIO, pctls ) ) - { - pctls->IlIOIssued().Remove( prequestIO ); - pctls->IlIORangeLockPending().InsertAsNextMost( prequestIO ); - } - else - { - fIORangeLockFailure = fTrue; - } + if ( FWaitForIORangeLock( prequestIO, pctls ) ) + { + pctls->IlIOIssued().Remove( prequestIO ); + pctls->IlIORangeLockPending().InsertAsNextMost( prequestIO ); } - pctls->CritAsyncIOWorkerState().Leave(); + else + { + fIORangeLockFailure = fTrue; + } + } + pctls->CritAsyncIOWorkerState().Leave(); - // determine which requested IO range locks have been acquired + // determine which requested IO range locks have been acquired - prequestIONext = NULL; - for ( CRequest* prequestIO = pctls->IlIORangeLockPending().PrevMost(); + prequestIONext = NULL; + for ( CRequest* prequestIO = pctls->IlIORangeLockPending().PrevMost(); prequestIO; prequestIO = prequestIONext ) - { - prequestIONext = pctls->IlIORangeLockPending().Next( prequestIO ); + { + prequestIONext = pctls->IlIORangeLockPending().Next( prequestIO ); - if ( prequestIO->Piorl()->FLocked() ) - { - pctls->IlIORangeLockPending().Remove( prequestIO ); - pctls->IlIORangeLocked().InsertAsNextMost( prequestIO ); - } + if ( prequestIO->Piorl()->FLocked() ) + { + pctls->IlIORangeLockPending().Remove( prequestIO ); + pctls->IlIORangeLocked().InsertAsNextMost( prequestIO ); } + } - // for each locked IO, request IO against the cached file and then the caching file. we do this to maximize - // our chances of IO optimization by the underlying file system implementation - // - // NOTE: RequestCachedFileIO / RequestCachingFileIO is touching slabs twice + // for each locked IO, request IO against the cached file and then the caching file. we do this to maximize + // our chances of IO optimization by the underlying file system implementation + // + // NOTE: RequestCachedFileIO / RequestCachingFileIO is touching slabs twice - while ( CRequest* prequestIO = pctls->IlIORangeLocked().PrevMost() ) - { - RequestCachedFileIO( prequestIO ); + while ( CRequest* prequestIO = pctls->IlIORangeLocked().PrevMost() ) + { + RequestCachedFileIO( prequestIO ); - pctls->IlIORangeLocked().Remove( prequestIO ); - pctls->IlCachedFileIORequested().InsertAsNextMost( prequestIO ); - } + pctls->IlIORangeLocked().Remove( prequestIO ); + pctls->IlCachedFileIORequested().InsertAsNextMost( prequestIO ); + } - while ( CRequest* prequestIO = pctls->IlCachedFileIORequested().PrevMost() ) - { - RequestCachingFileIO( prequestIO ); + while ( CRequest* prequestIO = pctls->IlCachedFileIORequested().PrevMost() ) + { + RequestCachingFileIO( prequestIO ); - pctls->IlCachedFileIORequested().Remove( prequestIO ); - pctls->IlCachingFileIORequested().InsertAsNextMost( prequestIO ); - } + pctls->IlCachedFileIORequested().Remove( prequestIO ); + pctls->IlCachingFileIORequested().InsertAsNextMost( prequestIO ); + } - // wait for all IO issued so far but asynchronously + // wait for all IO issued so far but asynchronously - while ( CRequest* prequestIO = pctls->IlCachingFileIORequested().PrevMost() ) - { - WaitForPendingIOAsync( pctls, prequestIO ); + while ( CRequest* prequestIO = pctls->IlCachingFileIORequested().PrevMost() ) + { + WaitForPendingIOAsync( pctls, prequestIO ); - pctls->IlCachingFileIORequested().Remove( prequestIO ); - pctls->IlIOPending().InsertAsNextMost( prequestIO ); - } + pctls->IlCachingFileIORequested().Remove( prequestIO ); + pctls->IlIOPending().InsertAsNextMost( prequestIO ); + } - // for each pending IO, check for any IOs that are complete + // for each pending IO, check for any IOs that are complete - prequestIONext = NULL; - for ( CRequest* prequestIO = pctls->IlIOPending().PrevMost(); + prequestIONext = NULL; + for ( CRequest* prequestIO = pctls->IlIOPending().PrevMost(); prequestIO; prequestIO = prequestIONext ) - { - prequestIONext = pctls->IlIOPending().Next( prequestIO ); - - if ( FCompletedIO( prequestIO ) ) - { - pctls->IlIOPending().Remove( prequestIO ); - pctls->IlIOCompleted().InsertAsNextMost( prequestIO ); + { + prequestIONext = pctls->IlIOPending().Next( prequestIO ); - ClearIOCompletion( prequestIO ); - } + if ( FCompletedIO( prequestIO ) ) + { + pctls->IlIOPending().Remove( prequestIO ); + pctls->IlIOCompleted().InsertAsNextMost( prequestIO ); } + } - // for each completed IO, finalize the IO in the cache + // for each completed IO, finalize the IO in the cache - while ( CRequest* prequestIO = pctls->IlIOCompleted().PrevMost() ) - { - RequestFinalizeIO( prequestIO ); + while ( CRequest* prequestIO = pctls->IlIOCompleted().PrevMost() ) + { + RequestFinalizeIO( prequestIO ); - pctls->IlIOCompleted().Remove( prequestIO ); - pctls->IlFinalizeIORequested().InsertAsNextMost( prequestIO ); - } + pctls->IlIOCompleted().Remove( prequestIO ); + pctls->IlFinalizeIORequested().InsertAsNextMost( prequestIO ); + } - // wait for all IO issued so far but asynchronously + // wait for all IO issued so far but asynchronously - while ( CRequest* prequestIO = pctls->IlFinalizeIORequested().PrevMost() ) - { - WaitForPendingIOAsync( pctls, prequestIO ); + while ( CRequest* prequestIO = pctls->IlFinalizeIORequested().PrevMost() ) + { + WaitForPendingIOAsync( pctls, prequestIO ); - pctls->IlFinalizeIORequested().Remove( prequestIO ); - pctls->IlFinalizeIOPending().InsertAsNextMost( prequestIO ); - } + pctls->IlFinalizeIORequested().Remove( prequestIO ); + pctls->IlFinalizeIOPending().InsertAsNextMost( prequestIO ); + } - // for each pending finalize IO, check for any IOs that are complete + // for each pending finalize IO, check for any IOs that are complete - prequestIONext = NULL; - for ( CRequest* prequestIO = pctls->IlFinalizeIOPending().PrevMost(); + prequestIONext = NULL; + for ( CRequest* prequestIO = pctls->IlFinalizeIOPending().PrevMost(); prequestIO; prequestIO = prequestIONext ) - { - prequestIONext = pctls->IlFinalizeIOPending().Next( prequestIO ); + { + prequestIONext = pctls->IlFinalizeIOPending().Next( prequestIO ); - if ( FCompletedIO( prequestIO ) ) - { - pctls->IlFinalizeIOPending().Remove( prequestIO ); - pctls->IlFinalizeIOCompleted().InsertAsNextMost( prequestIO ); - } + if ( FCompletedIO( prequestIO ) ) + { + pctls->IlFinalizeIOPending().Remove( prequestIO ); + pctls->IlFinalizeIOCompleted().InsertAsNextMost( prequestIO ); } + } - // for each finalized IO, release the IO range locks and trigger the IO completion + // for each finalized IO, release the IO range locks and trigger the IO completion - while ( CRequest* prequestIO = pctls->IlFinalizeIOCompleted().PrevMost() ) - { - pctls->IlFinalizeIOCompleted().Remove( prequestIO ); + while ( CRequest* prequestIO = pctls->IlFinalizeIOCompleted().PrevMost() ) + { + pctls->IlFinalizeIOCompleted().Remove( prequestIO ); - ReleaseIORangeLock( prequestIO, pctls ); + ReleaseIORangeLock( prequestIO, pctls ); - CRequest* prequestNext = NULL; - for ( CRequest* prequest = prequestIO->IlRequestsByIO().PrevMost(); - prequest; - prequest = prequestNext ) - { - prequestNext = prequestIO->IlRequestsByIO().Next( prequest ); + CRequest* prequestNext = NULL; + for ( CRequest* prequest = prequestIO->IlRequestsByIO().PrevMost(); + prequest; + prequest = prequestNext ) + { + prequestNext = prequestIO->IlRequestsByIO().Next( prequest ); - prequestIO->IlRequestsByIO().Remove( prequest ); + prequestIO->IlRequestsByIO().Remove( prequest ); - if ( prequest != prequestIO ) - { - pctls->RemoveRequest( prequest ); - } + if ( prequest != prequestIO ) + { + pctls->RemoveRequest( prequest ); } - - pctls->RemoveRequest( prequestIO ); } - // if we failed to get an IO Range Lock and we currently have no IO Range Locks then we should try to issue again + pctls->RemoveRequest( prequestIO ); + } + + // if we failed to get an IO Range Lock and we currently have no IO Range Locks then we should try to issue again - if ( fIORangeLockFailure && pctls->CIORangeLocked() == 0 ) - { - pctls->CueAsyncIOWorker(); - } + if ( fIORangeLockFailure && pctls->CIORangeLocked() == 0 ) + { + pctls->CueAsyncIOWorker(); } + + pctls->EndAsyncIOWorker(); } template @@ -8534,7 +8466,6 @@ ERR THashedLRUKCache::ErrSynchronousIO( _In_ CRequest* const prequest ) // wait for all the IO to complete even if it is not needed for finalization WaitForPendingIO( prequest ); - ClearIOCompletion( prequest ); // finalize the IO in the cache @@ -8767,7 +8698,7 @@ BOOL THashedLRUKCache::FCompletedIO( _In_ CRequest* const prequestIO ) prequest; prequest = prequestIO->IlRequestsByIO().Next( prequest ) ) { - if ( !prequest->FWaitForIOCompleted() ) + if ( !prequest->FIOCompleted() ) { return fFalse; } @@ -8776,17 +8707,6 @@ BOOL THashedLRUKCache::FCompletedIO( _In_ CRequest* const prequestIO ) return fTrue; } -template -void THashedLRUKCache::ClearIOCompletion( _In_ CRequest* const prequestIO ) -{ - for ( CRequest* prequest = prequestIO->IlRequestsByIO().PrevMost(); - prequest; - prequest = prequestIO->IlRequestsByIO().Next( prequest ) ) - { - prequest->ResetWaitForIO(); - } -} - template void THashedLRUKCache::RequestFinalizeIO( _In_ CRequest* const prequestIO ) { diff --git a/dev/ese/src/os/blockcache/_hashedlrukcachethreadlocalstorage.hxx b/dev/ese/src/os/blockcache/_hashedlrukcachethreadlocalstorage.hxx index 62a0d80b..4a2a1b14 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcachethreadlocalstorage.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcachethreadlocalstorage.hxx @@ -17,7 +17,8 @@ class CHashedLRUKCacheThreadLocalStorage // ctls : CCacheThreadLocalStorageBase( ctid ), m_pc( NULL ), m_ptpwIssue( NULL ), - m_cwAsyncIOWorkerState( ControlWord::cwNone ), + m_semAsyncIOWorkerRequest( CSyncBasicInfo( "CHashedLRUKCacheThreadLocalStorage::m_semAsyncIOWorkerRequest" ) ), + m_semAsyncIOWorkerExecute( CSyncBasicInfo( "CHashedLRUKCacheThreadLocalStorage::m_semAsyncIOWorkerExecute" ) ), m_ctidAsyncIOWorker( ctidInvalid ), m_critAsyncIOWorkerState( CLockBasicInfo( CSyncBasicInfo( "CHashedLRUKCacheThreadLocalStorage::m_critAsyncIOWorkerState" ), rankIssued, 0 ) ), m_rgibSlab { 0 }, @@ -26,6 +27,9 @@ class CHashedLRUKCacheThreadLocalStorage // ctls m_cIORangeLocked( 0 ), m_cbIORangeLocked( 0 ) { + m_semAsyncIOWorkerRequest.Release(); + m_semAsyncIOWorkerRequest.Release(); + m_semAsyncIOWorkerExecute.Release(); } void Initialize( _In_ THashedLRUKCache* const pc, _Inout_ TP_WORK** const pptpwIssue ) @@ -182,73 +186,36 @@ class CHashedLRUKCacheThreadLocalStorage // ctls void BeginAsyncIOWorker() { - m_ctidAsyncIOWorker = CtidCurrentThread(); + // serialize execution of the async IO worker because more than one can be requested and executing concurrently - OSTrace( JET_tracetagBlockCacheOperations, - OSFormat( "C=%s BeginAsyncIOWorker .%x", - OSFormatFileId( Pc() ), - Ctid() ) ); + m_semAsyncIOWorkerExecute.Acquire(); + m_ctidAsyncIOWorker = CtidCurrentThread(); } - BOOL FTryEndAsyncIOWorker() + void EndAsyncIOWorker() { - BOOL fNewRequest = fFalse; + // enable another async IO worker task to execute - // if the worker is requested and running then clear the requested state - // if the worker is not requested and running then clear the running state + m_ctidAsyncIOWorker = ctidInvalid; + m_semAsyncIOWorkerExecute.Release(); - OSSYNC_FOREVER - { - const ControlWord cwAsyncIOWorkerStateBIExpected = (ControlWord)AtomicRead( (LONG*)&m_cwAsyncIOWorkerState ); - const ControlWord cwAsyncIOWorkerStateAI = cwAsyncIOWorkerStateBIExpected == ControlWord::cwRunning ? - ControlWord::cwNone : - ControlWord::cwRunning; - const ControlWord cwAsyncIOWorkerStateBI = (ControlWord)AtomicCompareExchange( (LONG*)&m_cwAsyncIOWorkerState, - (LONG)cwAsyncIOWorkerStateBIExpected, - (LONG)cwAsyncIOWorkerStateAI ); - - if ( cwAsyncIOWorkerStateBI == cwAsyncIOWorkerStateBIExpected ) - { - fNewRequest = cwAsyncIOWorkerStateBI == ControlWord::cwRequestedAndRunning; - break; - } - } + // allow another async IO worker request to be made - OSTrace( JET_tracetagBlockCacheOperations, - OSFormat( "C=%s FTryEndAsyncIOWorker .%x = %s", - OSFormatFileId( Pc() ), - Ctid(), - OSFormatBoolean( fNewRequest ) ) ); + m_semAsyncIOWorkerRequest.Release(); - // if we are no longer running then release the ref count for the async IO worker + // release the ref count for this request - if ( !fNewRequest ) - { - CHashedLRUKCacheThreadLocalStorage* pctlsT = this; - Release( &pctlsT ); - } - - // we have succeeded in ending if there is no new request - - return !fNewRequest; + CHashedLRUKCacheThreadLocalStorage* pctlsT = this; + Release( &pctlsT ); } void CueAsyncIOWorker() { - // request the async IO worker - - const ControlWord cwAsyncIOWorkerStateBI = (ControlWord)AtomicExchange( (LONG*)&m_cwAsyncIOWorkerState, - (LONG)ControlWord::cwRequestedAndRunning ); - - const BOOL fNewRequest = ( cwAsyncIOWorkerStateBI == ControlWord::cwNone || - cwAsyncIOWorkerStateBI == ControlWord::cwRunning ); - const BOOL fSignalNeeded = cwAsyncIOWorkerStateBI == ControlWord::cwNone; + // try to get a token to request the async IO worker - // signal the async IO worker if necessary - - if ( fSignalNeeded ) + if ( m_semAsyncIOWorkerRequest.FTryAcquire() ) { - // add a ref count for the async IO worker + // add a ref count for this request AddRef(); @@ -256,13 +223,6 @@ class CHashedLRUKCacheThreadLocalStorage // ctls SubmitThreadpoolWork( PtpwIssue() ); } - - OSTrace( JET_tracetagBlockCacheOperations, - OSFormat( "C=%s CueAsyncIOWorker .%x fNewRequest = %s fSignalNeeded = %s", - OSFormatFileId( Pc() ), - Ctid(), - OSFormatBoolean( fNewRequest ), - OSFormatBoolean( fSignalNeeded ) ) ); } static void CueAsyncIOWorker_( _In_ const DWORD_PTR keyIOComplete ) @@ -281,26 +241,21 @@ class CHashedLRUKCacheThreadLocalStorage // ctls ~CHashedLRUKCacheThreadLocalStorage() { + m_semAsyncIOWorkerRequest.Acquire(); + m_semAsyncIOWorkerRequest.Acquire(); + m_semAsyncIOWorkerExecute.Acquire(); + m_pc->ReleaseThreadpoolState( &m_ptpwIssue ); } - private: - - enum class ControlWord : LONG - { - cwNone = 0, - cwRequested = 1, - cwRunning = 2, - cwRequestedAndRunning = 3, - }; - private: THashedLRUKCache* m_pc; TP_WORK* m_ptpwIssue; CCountedInvasiveList m_ilIORequested; - volatile ControlWord m_cwAsyncIOWorkerState; + CSemaphore m_semAsyncIOWorkerRequest; + CSemaphore m_semAsyncIOWorkerExecute; CacheThreadId m_ctidAsyncIOWorker; CCountedInvasiveList m_ilIORangeLockPending; CCountedInvasiveList m_ilIORangeLocked; From 14c01100fae4d3cf3a4791e3ef336f24d420e379 Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Fri, 21 Oct 2022 18:36:46 +0000 Subject: [PATCH 073/102] ESE Block Cache: fix the durability of IFileAPI::ErrFlushFileBuffers The current working theory is that this lost write is actually a lost reverted page that happened due to a sequence like this: - Begin revert and proceed up to where we revert pages - Revert pages in the EDB - Flush the FM - Flush the EDB - Update the persisted revert checkpoint to JET_revertstateCopingLogs - HA process restarts - some of the page reverts are rolled back when the EBC mounts - Restart and complete the revert from JET_revertstateCopingLogs - Recovery begins - Recovery fails with a lost write The flush of the EDB maps to a call to IFileAPI::ErrFlushFileBuffers which EBC maps to a call to THashedLRUKCache::ErrFlush. This is currently implemented as a call to flush the EBC's journal. An EBC journal entry must be explicitly indicated as durable by another subsequent journal entry to be guaranteed to be durable. This is achieved by flushing the journal again because the subsequent journal entry will indicate that the previous one was considered durable. If this doesn't happen then the journal entry can be lost on the next mount. We believe that this is what happened in this case. The fix is to change THashedLRUKCache::ErrFlush to flush the EBC journal twice. This change also changes ErrPrepareToDismount to use ErrFlush directly which effectively checks that it is working properly. If it doesn't then we will end up emitting telemetry (HashedLRUKCacheFlushAllStateIncomplete) when the file is closed. We also fix ErrPrepareToDismount's journal truncation to also be durable for write back to guarantee that we don't redo recovery of state that should already be flushed. [Substrate:4f6edd6e4b0d9b4a606c398335845fbd2d40b2b1] --- .../src/os/blockcache/_hashedlrukcache.hxx | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx index a580dd07..06ed89b1 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx @@ -5326,32 +5326,26 @@ HandleError: return err; } - template< class I > ERR THashedLRUKCache::ErrPrepareToDismount() { ERR err = JET_errSuccess; JournalPosition jposReplay = jposInvalid; JournalPosition jposDurableForWriteBack = jposInvalid; - JournalPosition jposDurable = jposInvalid; - // flush our state for all files + // durable flush all cached files Call( ErrFlush() ); - // flush all our state + // flush all our state and durably truncate the journal - Call( m_pj->ErrGetProperties( &jposReplay, &jposDurableForWriteBack, &jposDurable, NULL, NULL ) ); - if ( jposDurableForWriteBack < jposDurable ) - { - Call( ErrFlush() ); - Call( m_pj->ErrGetProperties( &jposReplay, &jposDurableForWriteBack, &jposDurable, NULL, NULL ) ); - } - if ( jposReplay < jposDurableForWriteBack ) + Call( m_pj->ErrGetProperties( &jposReplay, &jposDurableForWriteBack, NULL, NULL, NULL ) ); + if ( rounddn( (QWORD)jposReplay, cbJournalSegment ) < rounddn( (QWORD)jposDurableForWriteBack, cbJournalSegment ) ) { Call( ErrFlushAllState( jposDurableForWriteBack, fFalse ) ); Call( m_pj->ErrTruncate( jposDurableForWriteBack ) ); - Call( ErrFlush() ); + Call( m_pj->ErrFlush() ); + Call( m_pj->ErrFlush() ); } HandleError: @@ -8042,9 +8036,12 @@ ERR THashedLRUKCache::ErrFlush() { ERR err = JET_errSuccess; - // flush the journal + // flush the journal twice to ensure all updates are durable for write back and will survive a restart + // + // NOTE: see the comment in TJournalSegmentManager::ErrFindLastSegmentWithBinarySearch for an explanation Call( m_pj->ErrFlush() ); + Call( m_pj->ErrFlush() ); HandleError: return err; From b66944a23fffe6d725e7e07f1c7b88c6d1e99506 Mon Sep 17 00:00:00 2001 From: Anil Ruia Date: Fri, 21 Oct 2022 23:36:57 +0000 Subject: [PATCH 074/102] Couple OACR fixes [Substrate:0e6653db1c115923f9d0a0e2ae7d9fe5a77e6769] --- dev/ese/src/ese/cpage.cxx | 2 +- dev/ese/src/ese/repair.cxx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/ese/src/ese/cpage.cxx b/dev/ese/src/ese/cpage.cxx index d2e39137..17012df8 100644 --- a/dev/ese/src/ese/cpage.cxx +++ b/dev/ese/src/ese/cpage.cxx @@ -4720,7 +4720,7 @@ ERR CPAGE::ErrCheckPage( const ULONG_PTR pbPageDataEnd = PbDataEnd_(); // m_bfl.pv + CbBuffer() - tag array size (off itagMicFree) if ( pbPageDataEnd <= ( pbPageDataStart + 1 /* generous 1 byte in data section, tighter check next */ ) ) { - MakeCorruptionDetailsSz( L"itagMicFree / tag array too large, overlapping PGHDR (%p,%p,%I64d / %d)", pbPageDataEnd, pbPageDataStart, ctags, CbTagArray_() ); + MakeCorruptionDetailsSz( L"itagMicFree / tag array too large, overlapping PGHDR (%p,%p,%I64d / %d)", (PVOID)pbPageDataEnd, (PVOID)pbPageDataStart, ctags, CbTagArray_() ); (*pcprintf)( "%ws\r\n", wszCorruptionDetails ); Error( ErrCaptureCorruptedPageInfoSz( mode, L"TagArrayWalkingOntoPghdr", wszCorruptionDetails, fLogEvent ) ); } diff --git a/dev/ese/src/ese/repair.cxx b/dev/ese/src/ese/repair.cxx index 5ae17513..f744915e 100644 --- a/dev/ese/src/ese/repair.cxx +++ b/dev/ese/src/ese/repair.cxx @@ -7167,7 +7167,7 @@ LOCAL ERR ErrREPAIRICheck( { if ( !csr.Cpage().FInvisibleSons() ) { - (*popts->pcprintfError)( "page %d: not an internal page\r\n" ); + (*popts->pcprintfError)( "page %d: not an internal page\r\n", csr.Pgno() ); Call( ErrERRCheck( JET_errDatabaseCorrupted ) ); } From 074cdf59a471af635db86198c09b42db0d24c648 Mon Sep 17 00:00:00 2001 From: Brett Shirley Date: Mon, 24 Oct 2022 15:48:33 +0000 Subject: [PATCH 075/102] [Redo] A slew of JetInit and Failure Item diagnostic enhancement Changes: - Add new START_INSTANCE_FAILED_ID / Event ID 109 for JetInit() failures all up. - Add INST tracking to see if Failure Items we're thrown on this instance for that event. - Track all failed JetInit() calls in a new .IRS.RAW file as well, so they are not missed. - Promote some cpage errors to corruption failure items. - Tweak some other failure items in logredo.cxx that seem wrong. - [Disabled] Add diagnostics for missed failure items. Note: this tracking went off severely, needs more debugging, checking it in #ifdef'd out. - Make LastThrow only track negative error values (trying to enhance probability that error at end of a JET API is what caused it to fail). - Also enhanced !ese dump dbfilehdr to read the DB header off the disk, instead of pulling it from memory. - Added a sev3 / email Alert monitor if we failed JetInit with a corruption failure item. - Added optics parsing to break out the JetInit failure to Activating versus Replay, and then for Corruption category - if we triggered an HA failure item or not. [Substrate:27d063d4395b15947fe3ea9b031f634f243a628c] --- dev/ese/published/inc/os/error.hxx | 11 +- dev/ese/published/inc/os/hapublish.hxx | 5 + dev/ese/src/_res/jetmsg.mc | 11 + dev/ese/src/ese/_log/logredo.cxx | 10 +- dev/ese/src/ese/_osu/hapublishu.cxx | 89 ++++- dev/ese/src/ese/cpage.cxx | 9 - dev/ese/src/ese/jetapi.cxx | 480 +++++++++++++++++++++++-- dev/ese/src/ese/jettest.cxx | 10 +- dev/ese/src/ese/sysinit.cxx | 10 + dev/ese/src/inc/daedef.hxx | 7 +- dev/ese/src/inc/jettest.hxx | 3 + dev/ese/src/inc/log.hxx | 4 + dev/ese/src/os/edbg.cxx | 99 ++++- 13 files changed, 686 insertions(+), 62 deletions(-) diff --git a/dev/ese/published/inc/os/error.hxx b/dev/ese/published/inc/os/error.hxx index 08327908..0437a6cf 100644 --- a/dev/ese/published/inc/os/error.hxx +++ b/dev/ese/published/inc/os/error.hxx @@ -523,12 +523,6 @@ public: __forceinline CErrFrameSimple * PefLastThrow(); -__forceinline ERR ErrERRSetLastThrow( _In_ const CHAR* szFile, _In_ const LONG lLine, _In_ const ERR err ) -{ - PefLastThrow()->Set( szFile, lLine, err ); - return err; -} - // Returns the line of the last call that failed out w/ an error, presumably within this frame. ULONG UlLineLastCall(); @@ -569,7 +563,10 @@ ERR ErrERRCheck_( const ERR err, const CHAR* szFile, const LONG lLine ); __forceinline ERR ErrERRCheck_( _In_ const ERR err, _In_ const CHAR* szFile, _In_ const LONG lLine ) { extern ERR g_errTrap; - PefLastThrow()->Set( szFile, lLine, err ); + if ( err < 0 /* JET_errSuccess */ ) + { + PefLastThrow()->Set( szFile, lLine, err ); + } if ( g_errTrap == err ) { KernelDebugBreakPoint(); diff --git a/dev/ese/published/inc/os/hapublish.hxx b/dev/ese/published/inc/os/hapublish.hxx index 93929b87..a1118032 100644 --- a/dev/ese/published/inc/os/hapublish.hxx +++ b/dev/ese/published/inc/os/hapublish.hxx @@ -8,6 +8,11 @@ #include #include "exdbmsg_ese.h" +const DWORD bitHaPublishedEvent = 0x1; +const DWORD bitHaPublishedCorruptionTag = 0x2; +const DWORD bitHaPublishedIoHardTag = 0x4; +const DWORD bitHaPublishedLogLogicallyInconsistentTag = 0x2; + #define Ese2HaId( id ) ( HADBFAILURE_EVENT_RANGE_START_ESE + ( id ) ) #define OSUHAPublishEvent( p0, p1, p2, p3, p4, p5, p6, p7, p8, p9 ) \ diff --git a/dev/ese/src/_res/jetmsg.mc b/dev/ese/src/_res/jetmsg.mc index fc64cb61..69a4ed34 100644 --- a/dev/ese/src/_res/jetmsg.mc +++ b/dev/ese/src/_res/jetmsg.mc @@ -215,6 +215,17 @@ Language=English %1 (%2) %3The specific ESE configuration store is locked in a read inhibit state, clear the %1 registry value to enable ESE to continue and utilize the config store. . +MessageId=109 +SymbolicName=START_INSTANCE_FAILED_ID +Language=English +%1 (%2) %3The database engine failed to start instance (%4) due to error %5. (Time=%6 seconds) +%n +Failure Details:%n +Mode: %7%n +Fail Address: %8%n +Publishing: %9%n +. + ;// You are almost assuredly not adding in the right place? diff --git a/dev/ese/src/ese/_log/logredo.cxx b/dev/ese/src/ese/_log/logredo.cxx index 793c36cd..e0124328 100644 --- a/dev/ese/src/ese/_log/logredo.cxx +++ b/dev/ese/src/ese/_log/logredo.cxx @@ -3570,6 +3570,10 @@ ERR LOG::ErrLGRIRedoNodeOperation( const LRNODE_ *plrnode, ERR *perr ) Assert( (ULONG)data.Cb() == cbNewData ); if ( (ULONG)data.Cb() != cbNewData ) { + // per analysis of a real world case, it is hard to imagine how local (passive) data or remote (active) + // database data generated this incorrectness. This is literally saying the ib/cb pairs do NOT add up + // to the final record size (from the active). This almost assuredly means that there was a corruption + // of the actual log record data. Or a bug in our diff creation or reconstruction alg. OSUHAEmitFailureTag( m_pinst, HaDbFailureTagLogLogicallyInconsistent, L"a3cb57b9-8ba1-496d-a6fc-4fc2f0140fc4" ); Error( ErrERRCheck( JET_errLogCorrupted ) ); } @@ -5283,7 +5287,7 @@ ERR LOG::ErrLGRICheckRedoAttachDb( goto HandleError; } } - else if ( JET_errReadVerifyFailure == err ) + else if ( FErrIsDbCorruption( err ) ) { reason = eDARHeaderCorrupt; if ( pfmp->FIgnoreDeferredAttach() ) @@ -5293,8 +5297,8 @@ ERR LOG::ErrLGRICheckRedoAttachDb( } else { - // the log file header is corrupt - OSUHAEmitFailureTag( m_pinst, HaDbFailureTagRecoveryRedoLogCorruption, L"9106f5c1-2f93-479b-a12a-c93c6ab3de68" ); + // the DB file header is corrupt + OSUHAEmitFailureTag( m_pinst, HaDbFailureTagCorruption, L"9106f5c1-2f93-479b-a12a-c93c6ab3de68" ); goto HandleError; } } diff --git a/dev/ese/src/ese/_osu/hapublishu.cxx b/dev/ese/src/ese/_osu/hapublishu.cxx index e0d0cdad..4a564121 100644 --- a/dev/ese/src/ese/_osu/hapublishu.cxx +++ b/dev/ese/src/ese/_osu/hapublishu.cxx @@ -4,6 +4,8 @@ #include "osustd.hxx" #include "esestd.hxx" + + // The current layer is high enough to understand INST internal. // Extracing m_wszInstanceName and m_wszDisplayName was // not possible down inside OS layer @@ -20,8 +22,25 @@ void OSUHAPublishEvent_( DWORD cParameter, const WCHAR** rgwszParameter ) { + BOOL fEmit = fTrue; + // failure events need not be published if there is no instance - if ( pinstNil != pinst && UlParam( pinst, JET_paramEnableHaPublish ) ) + // update: why? + if ( pinst == NULL || pinstNil == pinst ) + { +#ifdef ENABLE_MISSED_FAILURE_ITEM_TRACKING + FireWall( "SkipFi2NoInst" ); +#endif + fEmit = fFalse; + } + + if ( !UlParam( pinst, JET_paramEnableHaPublish ) ) + { + // might be nice to Assert/FireWall not O365 Datacenter / Store.worker, but a bit of a layer violation + fEmit = fFalse; + } + + if ( fEmit ) { OSUHAPublishEventImpl( haTag, pinst->m_wszInstanceName, @@ -131,10 +150,42 @@ void OSUHAEmitFailureTag_( } } + // FUTURE: HA Publish is only for O365 datacenter, but even so this is a bit of a layering violation. We will + // add these temporarily to do a basic health check on O365 to see if we're dropping HA FailureItems from any ESE + // code paths. + const BOOL fO365StoreWorker = ( _wcsicmp( WszUtilProcessName(), L"Microsoft.Exchange.Store.Worker" ) == 0 ); + const BOOL fO365DatacenterProcess = + fO365StoreWorker || + ( _wcsicmp( WszUtilProcessName(), L"MSExchangeRepl" ) == 0 ) || + ( _wcsicmp( WszUtilProcessName(), L"EdgeTransport" ) == 0 ) || + ( _wcsicmp( WszUtilProcessName(), L"Microsoft.Exchange.DxStore.HA.Instance" ) == 0 ) || + ( _wcsicmp( WszUtilProcessName(), L"Microsoft.Exchange.SharedCache" ) == 0 ) || + ( _wcsicmp( WszUtilProcessName(), L"Microsoft.Exchange.Store.Service" ) == 0 ); // calls JET APIs, but should not actually start ese inst + // should we add eseutil? + // if the instance pointer is NULL then do not emit an event // if ( !pinstActual ) { +#ifdef ENABLE_MISSED_FAILURE_ITEM_TRACKING + if ( !FInEmbeddedUnitTest() ) + { + if ( haTag != HaDbFailureTagMemory ) + { + CHAR szTag[60]; + OSStrFormatA( szTag, sizeof( szTag ), "SkipFiNoInstProvided-%d", haTag ); + FireWall( szTag ); + } + else if ( !FOSLayerUp() ) + { + FireWall( "SkipFiNoInstAllocBeforeOsInit" : + } + else + { + FireWall( "SkipFiNoInstAllocVictimNotFoundOrAcquired" ); + } + } +#endif fEmit = fFalse; } @@ -142,6 +193,12 @@ void OSUHAEmitFailureTag_( // if ( pinstActual && !UlParam( pinstActual, JET_paramEnableHaPublish ) ) { +#ifdef ENABLE_MISSED_FAILURE_ITEM_TRACKING + if ( fO365StoreWorker ) + { + FireWall( "SkipFiHaPublishOff" ); + } +#endif fEmit = fFalse; } @@ -151,6 +208,13 @@ void OSUHAEmitFailureTag_( ( !pinstActual->m_wszInstanceName || !pinstActual->m_wszInstanceName[ 0 ] || !pinstActual->m_wszDisplayName || !pinstActual->m_wszDisplayName[ 0 ] ) ) { + // many test processes have this off, but all real ESE instances should be correctly identified. +#ifdef ENABLE_MISSED_FAILURE_ITEM_TRACKING + if ( fO365DatacenterProcess ) + { + FireWall( "SkipFiNoInstOrDispName" ); + } +#endif fEmit = fFalse; } @@ -158,6 +222,9 @@ void OSUHAEmitFailureTag_( // if ( haTag == HaDbFailureTagNoOp ) { +#ifdef ENABLE_MISSED_FAILURE_ITEM_TRACKING + FireWall( "SkipFiTagNoOp" ); +#endif fEmit = fFalse; } @@ -165,6 +232,9 @@ void OSUHAEmitFailureTag_( // if ( !wszGuid || !wszGuid[ 0 ] ) { +#ifdef ENABLE_MISSED_FAILURE_ITEM_TRACKING + FireWall( "SkipFiNoGuid" ); +#endif fEmit = fFalse; } @@ -176,6 +246,9 @@ void OSUHAEmitFailureTag_( HA_NOOP_FAILURE_TAG_ID + msgidOffset <= HA_NOOP_FAILURE_TAG_ID || HA_NOOP_FAILURE_TAG_ID + msgidOffset > HA_MAX_FAILURE_TAG_ID ) { +#ifdef ENABLE_MISSED_FAILURE_ITEM_TRACKING + FireWall( "SkipFiEvtOutOfRange" ); +#endif fEmit = fFalse; } @@ -214,6 +287,20 @@ void OSUHAEmitFailureTag_( HA_NOOP_FAILURE_TAG_ID + msgidOffset, iwsz, rgwsz ); + + AtomicExchangeSet( (ULONG*)&pinst->m_grbitHaFailureTags, (ULONG)bitHaPublishedEvent ); + if ( haTag == HaDbFailureTagCorruption ) + { + AtomicExchangeSet( (ULONG*)&pinst->m_grbitHaFailureTags, (ULONG)bitHaPublishedCorruptionTag ); + } + if ( haTag == HaDbFailureTagIoHard ) + { + AtomicExchangeSet( (ULONG*)&pinst->m_grbitHaFailureTags, (ULONG)bitHaPublishedIoHardTag ); + } + if ( haTag == HaDbFailureTagLogLogicallyInconsistent ) + { + AtomicExchangeSet( (ULONG*)&pinst->m_grbitHaFailureTags, (ULONG)bitHaPublishedLogLogicallyInconsistentTag ); + } } // cleanup diff --git a/dev/ese/src/ese/cpage.cxx b/dev/ese/src/ese/cpage.cxx index 17012df8..a18a036e 100644 --- a/dev/ese/src/ese/cpage.cxx +++ b/dev/ese/src/ese/cpage.cxx @@ -4912,20 +4912,14 @@ ERR CPAGE::ErrCheckPage( // The whole line is starting above the data start, i.e. off the data section, and possibly even off page. MakeCorruptionDetailsSz( L"TAG %d computed offset starts too high (ib=%d, cb=%d, %p > %p)", itag, ib, cb, line.pv, pbPageDataEnd ); (*pcprintf)( "%ws\r\n", wszCorruptionDetails ); - PageAssertTrack( *this, fFalse, "LineEntirelyAboveDataSection" ); -#ifdef DEBUG Error( ErrCaptureCorruptedPageInfoSz( mode, L"LineEntirelyAboveDataSection", wszCorruptionDetails, fLogEvent ) ); -#endif } if ( pbLineLastByte > pbPageDataEnd ) { // The line ends above the data start, i.e. off the data section, but does start / overlaping in valid data section. MakeCorruptionDetailsSz( L"TAG %d computed offset starts too high (ib=%d, cb=%d, %p > %p)", itag, ib, cb, pbLineLastByte, pbPageDataEnd ); (*pcprintf)( "%ws\r\n", wszCorruptionDetails ); - PageAssertTrack( *this, FNegTest( fCorruptingPageLogically ), "LineEndsAboveDataSection" ); -#ifdef DEBUG Error( ErrCaptureCorruptedPageInfoSz( mode, L"LineEndsAboveDataSection", wszCorruptionDetails, fLogEvent ) ); -#endif } if ( errGetLine < JET_errSuccess || !FOnData( line.pv, line.cb ) ) @@ -4937,10 +4931,7 @@ ERR CPAGE::ErrCheckPage( MakeCorruptionDetailsSz( L"UNCAUGHT: TAG %d ErrGetPtr() failed or got line off page (ib=%d, cb=%d, err=%d,f=%d).", itag, ib, cb, errGetLine, FOnData( line.pv, line.cb ) ); (*pcprintf)( "%ws\r\n", wszCorruptionDetails ); // there should not be too many errors coming from ErrGetLine() that we can't embed the err in the corruption type. - PageAssertTrack( *this, FNegTest( fCorruptingPageLogically ), "GetLineFailed:%d\n", errGetLine ); -#ifdef DEBUG Error( ErrCaptureCorruptedPageInfoSz( mode, wszGetLineErr, wszCorruptionDetails, fLogEvent ) ); -#endif } // do some simple KEYDATAFLAGS checks diff --git a/dev/ese/src/ese/jetapi.cxx b/dev/ese/src/ese/jetapi.cxx index 3181db37..7c030a68 100644 --- a/dev/ese/src/ese/jetapi.cxx +++ b/dev/ese/src/ese/jetapi.cxx @@ -6,6 +6,9 @@ #include "errdata.hxx" #include "_bf.hxx" // for JetTestHook +// ISSUE-2014/09/12-BrettSh - I personally view this as a layering violation of stuff that should be +// contained in the OS layer, as it is specific to the Windows OS, and as used today actually only +// affects phone. #ifdef ESENT #include "slpolicylist.h" #else @@ -56,7 +59,7 @@ Instructions for adding a JET param 2) Modify sysparam.xml to create a defintion for the JET param 3) Run gengen.bat to update the source files. -- ------ ManagedEsent Layer ------- - A) Update manually. // [2014/08/09 - SOMEONE]: Support for auto-generated ManagedEsent params will be added soon. + A) Update manually. // [2014/08/09 - UmairA]: Support for auto-generated ManagedEsent params will be added soon. */ JET_ERR ErrERRLookupErrorCategory( @@ -1358,6 +1361,7 @@ VOID INST::TraceStationId( const TraceStationIdentificationReason tsidr ) return; } + // FUTURE-2017/08/02-BrettSh - also consider adding some critical or ?ALL? params to an additional trace? ETInstStationId( tsidr, m_iInstance, (BYTE)m_perfstatusEvent, m_wszInstanceName, m_wszDisplayName ); } @@ -1393,6 +1397,9 @@ __range( 0, g_ifmpMax * ( cchPerfmonInstanceNameMax + 1 ) + 1 ) ULONG g_cchDatab BYTE* g_rgbDatabaseAggregationIDs = NULL; +// ISSUE-2009/10/21-BrettSh - This used to be static, but needed it in osu.cxx. Would +// love to get this static again, or have a cleaner init/term story for perfmon and +// these instance names. INT g_cInstances = 0; INT g_cDatabases = 0; @@ -1549,6 +1556,92 @@ VOID PERFSetDatabaseNames( IFileSystemAPI* const pfsapi ) } +#ifdef ENABLE_MICROSOFT_MANAGED_DATACENTER_LEVEL_OPTICS + +// +// Trace to an IRS.RAW the init failure. +// + +void DumpFailedInitToIrsRaw( + _In_ INST * pinst, + _In_ PCWSTR wszInstDisplayName, + _In_ PCWSTR wszErrorState, + _In_ PCWSTR wszSeconds, + _In_ PCWSTR wszFailingMode, + _In_ PCWSTR wszFailingAddress, + _In_ PCWSTR wszHaPublishingFacts ) +{ + __int64 fileTime; + WCHAR wszDate[32]; + WCHAR wszTime[32]; + size_t cchRequired; + WCHAR wszInstIrsFile[ 5 /* Inst- */ + 3 /* inst log base name */ + 1 ]; + WCHAR wszInstIrsPathBase[ OSFSAPI_MAX_PATH ]; + CPRINTF * pcprintfPageTrace = NULL; + + if ( pinst == NULL || pinst->m_pfsapi == NULL ) + { + FireWall( "InstIrsUnexpectedInitExitBeforeInstOrPfsapiAlloc" ); + return; + } + + if ( ( SzParam( pinst, JET_paramLogFilePath ) == NULL ) || + ( SzParam( pinst, JET_paramLogFilePath )[0] == L'\0' ) ) + { + FireWall( "InstIrsLogPathNotSet" ); + return; + } + + if ( ( SzParam( pinst, JET_paramBaseName ) == NULL ) || + ( SzParam( pinst, JET_paramBaseName )[0] == L'\0' ) ) + { + FireWall( "InstIrsBaseNameNotSet" ); + return; + } + + // make path + // + OSStrCbFormatW( wszInstIrsFile, sizeof( wszInstIrsFile ), L"Inst-%ws", SzParam( pinst, JET_paramBaseName ) ); + ERR errT = pinst->m_pfsapi->ErrPathBuild( + SzParam( pinst, JET_paramLogFilePath ), + wszInstIrsFile, + L"", // ext filled by IRS func / ErrBeginDatabaseIncReseedTracing() + wszInstIrsPathBase, + sizeof( wszInstIrsPathBase ) ); + if ( errT < JET_errSuccess ) + { + FireWall( "InstIrsPathBuildFail" ); + return; + } + + // start tracing (before anything else) + // + errT = ErrBeginDatabaseIncReseedTracing( pinst->m_pfsapi, wszInstIrsPathBase, &pcprintfPageTrace ); + if ( errT < JET_errSuccess ) + { + AssertSzRTL( FRFSAnyFailureDetected(), "InstIrsFailedIrsOpen" ); + return; + } + + fileTime = UtilGetCurrentFileTime(); + ErrUtilFormatFileTimeAsTimeWithSeconds( fileTime, wszTime, _countof(wszTime), &cchRequired); + ErrUtilFormatFileTimeAsDate( fileTime, wszDate, _countof(wszDate), &cchRequired); + (*pcprintfPageTrace)( "Begin " __FUNCTION__ "() @ Time %ws %ws\r\n", wszTime, wszDate ); + + // Consider adding ERRFormatIssueSource() to get last error information and Server Version. + (*pcprintfPageTrace)( "JetInit (%ws) Failed with %ws in %ws seconds.\r\n", wszInstDisplayName, wszErrorState, wszSeconds ); + (*pcprintfPageTrace)( "Failing Mode: %ws\r\n", wszFailingMode ); + (*pcprintfPageTrace)( "Failing Address: %ws\r\n", wszFailingAddress ); + (*pcprintfPageTrace)( "HA Pub Facts: %ws\r\n", wszHaPublishingFacts ); + + EndDatabaseIncReseedTracing( &pcprintfPageTrace ); + + return; +} + +#endif // ENABLE_MICROSOFT_MANAGED_DATACENTER_LEVEL_OPTICS + + // // CIsamSequenceDiagLog // @@ -1683,6 +1776,10 @@ void CIsamSequenceDiagLog::Trigger( _In_ const BYTE seqTrigger ) // the time adjustments - it is sort of a little ambigious, does the waits we accumulated // at m_cseqMac - 1 belong there, or at the new seqTrigger array element? So we'll assert // if jumping by more than one sequence value, that there are no time adjustments lingering. + // ISSUE-2016/12/08-BrettSh - On final review realized this is not quite complete, we may + // have skipped two or more steps ... so we should walk back checking all untriggered steps + // for this, until we hit a triggered step (which would be allowed to have callbacks, throttles + // busy waits, etc). Assert( FTriggeredStep( m_cseqMac - 1 ) || m_rgDiagInfo[ m_cseqMac - 1 ].cCallbacks == 0 ); Assert( FTriggeredStep( m_cseqMac - 1 ) || m_rgDiagInfo[ m_cseqMac - 1 ].cThrottled == 0 ); } @@ -1717,6 +1814,9 @@ void CIsamSequenceDiagLog::AddCallbackTime( const double secsCallback, const __i return; // see comments in Trigger() about this. } + // FUTURE-2016/10/30-BrettSh - Since engaging the floating point can burn alot of CPU, I really should + // have just accumulated DHRTs and then converted it to seconds at the very end for the sprintf / stats + // accumulation. Applies to AddThrottleTime() as well. m_rgDiagInfo[m_cseqMac].cCallbacks += cCallbacks; m_rgDiagInfo[m_cseqMac].secInCallback += secsCallback; } @@ -1778,6 +1878,9 @@ __int64 CIsamSequenceDiagLog::UsecTimer( _In_ INT seqBegin, _In_ const INT seqEn return 0; } + Expected( FTriggeredSequence_( 0 ) ); // be odd to have not started sequence and ask for timings + Expected( seqEnd + 1 != m_cseqMax || FTriggeredSequence_( seqEnd ) ); + if ( !FValidSequence_( seqBegin ) || !FValidSequence_( seqEnd ) || seqBegin >= seqEnd || @@ -1793,9 +1896,10 @@ __int64 CIsamSequenceDiagLog::UsecTimer( _In_ INT seqBegin, _In_ const INT seqEn { seqBegin--; } - + Expected( seqBegin < seqEnd ); // this should be true unless we had a failure before the 2nd sequence (seq = 1). Let us see if it happens. + if ( !FTriggeredSequence_( seqBegin ) || - !FTriggeredSequence_( seqEnd ) ) + !FTriggeredSequence_( seqEnd ) ) { return 0; } @@ -2120,6 +2224,11 @@ void CIsamSequenceDiagLog::SprintTimings( _Out_writes_bytes_(cbTimeSeq) WCHAR * dckbPagefileUsagePeak || dckbPrivateUsage ) ) { + // FUTURE-2014/12/13-BrettSh - Jeez, I wish we had the kind of engine where I wasn't measuring + // in KB! But for now, we do, and also since we don't have heap bytes (which would be more useful + // in real bytes), all of these deltas are actually multiples of KB. + // FUTURE-2016/02/06-BrettSh - We could easily add some other interesting stats like reserved + // memory, differentiating mapped binary image (b/c DLL is shared), unique reference set, etc. OSStrCbFormatW( pwszCurr, cbCurrLeft, L" +M(C:%I64dK, Fs:%d, WS:%IdK # %IdK, PF:%IdK # %IdK, P:%I64dK)", dckbCacheMem, m_rgDiagInfo[seq].memstat.cPageFaultCount - m_rgDiagInfo[seqBefore].memstat.cPageFaultCount, @@ -2426,6 +2535,9 @@ VOID INST::SaveDBMSParams( DBMS_PARAM *pdbms_param ) VOID INST::RestoreDBMSParams( DBMS_PARAM *pdbms_param ) { + // FUTURE-2010/05/20-BrettSh - So this is a bit dicey, but it is probably ok, because + // we either tear down the instance right after restore, or in the case we use it, we + // probably have to have the right params anyway. m_plog->SetCSecLGFile( pdbms_param->le_lcsecLGFile ); } @@ -2671,6 +2783,10 @@ PM_ICF_PROC LTableClassNamesICFLPwszPpb; // Max characters in any table class name's suffix that we add automatically. These // strings are defined below. #define cchTCESuffixMax (10) +// ISSUE-2006/01/21-BrettSh - I think this is a localization issue? The normal perf counters +// names as picked up from esentprf.ini are in fact localized in Vista. So does this work? +// not quite sure, should be investigated. Note simply making these Unicode doesn't work, +// someone needs to investigate how to pull the strings from a localizable context. const WCHAR * const g_wszUnknown = L"_Unknown"; const WCHAR * const g_wszCatalog = L"_Catalog"; const WCHAR * const g_wszShadowCatalog = L"_ShadowCatalog"; @@ -2932,6 +3048,8 @@ INLINE VOID RUNINSTSetModeMultiInst() // we're in, while JET_paramMaxInstances // only keeps track of the max instances in multi- // instance mode) + // FUTURE-2013/10/22-BrettSh - This is too early and far away from where we're + // allocating g_rgpinst. g_cpinstMax = (ULONG)UlParam( JET_paramMaxInstances ); g_ifmpMax = g_cpinstMax * dbidMax + cfmpReserved; } @@ -2958,6 +3076,16 @@ LOCAL ERR ErrRUNINSTCheckAndSetOneInstMode() if ( RUNINSTGetMode() == runInstModeNoSet ) { Assert( g_cpinstInit == 0 ); + // FUTURE-2013/10/17-BrettSh - This is delicate code ... you see restore will + // allocate the instance (for single inst mode), BUT it's dangerous to leave + // ourselves in this state ... b/c if we fail out anywhere between here and + // restore getting into the beginning of ErrNewInst(), we leave ESE in this + // stuck state, where it thinks it is in one-inst mode, but there is no actual + // instance initialized. You can't JetTerm() the non-existing implicit instance + // to restore yourself to the no-mode state. It might be better to actually + // move all this code into ErrINSTSystemInit() but JetEnableMultiInstance() + // actually utilizes this separated state to force us to one way go into multi- + // inst mode. RUNINSTSetModeOneInst(); } else if ( RUNINSTGetMode() == runInstModeMultiInst ) @@ -2979,7 +3107,7 @@ LOCAL ERR ErrRUNINSTCheckOneInstMode() if ( RUNINSTGetMode() == runInstModeNoSet ) { Assert( g_cpinstInit == 0 ); - // SOMEONE here: I checked and I could only find 4 instances (all variants of + // BrettSh here: I checked and I could only find 4 instances (all variants of // JetRestore) of this called where it would expect / be OK with no mode being // set. So the callers expect no mode to be a failure (which they will fail // with once they call ErrFindPinst()). @@ -3079,12 +3207,23 @@ class CInstanceFileSystemConfiguration : public CDefaultFileSystemConfiguration DWORD cioT = 0; switch( rand() % 5 ) { + // FUTURE-2013/08/06-BrettSh - Actually we should try 1 and 2 and 3 and other low numbers + // just to search for bugs ... + //case 0: cioT = 1; break; + //case 0: cioT = 2; break; + //case 0: cioT = 3; break; + //case 0: cioT = 12; break; + //case 1: cioT = 36; break; + //case 2: cioT = 108; break; case 0: cioT = 324; break; case 1: cioT = 1024; break; case 2: cioT = 3072; break; case 3: cioT = 10000; break; case 4: cioT = 32764; break; } + // FUTURE-2013/08/26-BrettSh - Really should have an OSTrace() to indicate all interesting + // settings overrides. + //wprintf( L"\t\tDefaulted JET_paramOutstandingIOMax = %d\n", cioT ); m_cioOutstandingMax = min( (ULONG)UlParam( m_pinst, JET_paramOutstandingIOMax ), cioT ); } #endif // DEBUG @@ -3138,7 +3277,7 @@ class CInstanceFileSystemConfiguration : public CDefaultFileSystemConfiguration // initialize this setting if ( m_permillageSmoothIo == dwMax ) { - // Exs: 999‰ = 99.9% Smooth, 990‰ = 99.0% Smooth, 900‰ = 90.0% Smooth. Debug default = 0.2% + // Exs: 999‰ = 99.9% Smooth, 990‰ = 99.0% Smooth, 900‰ = 90.0% Smooth. Debug default = 0.2% ULONG permillageSmoothIo = OnDebugOrRetail( 2, CDefaultFileSystemConfiguration::PermillageSmoothIo() ); if ( m_pinst ) @@ -3500,6 +3639,10 @@ ERR ErrNewInst( // initialize the system if we are creating the first instance // + // FUTURE-2013/10/21-BrettSh - I would like to see this get moved out of ErrNewInst() at + // some point as some of the global state setting is done with the runInstModeMultiInst, + // runInstModeNoSet, runInstOneInst outside / before this, and some is done in here where + // we seem to be predominantly caring about specific inst stuff. if ( 0 == g_cpinstInit ) { // OSUInit's done my misc APIs may not have been done with the correct global params, @@ -3518,6 +3661,11 @@ ERR ErrNewInst( // See if g_rgpinst still have space to hold the pinst. // + // FUTURE-2013/10/21-BrettSh - I would like to see this get moved out of ErrNewInst() at + // some point as this seems to be g_rgpinst list management being done here, where as much + // what else is done in here seems to be predominantly caring about specific inst stuff. + // BTW, we should move this bounds check, and the ipinst loop check for a conflicting name, + // and the loop trying to find an empty slot. if ( g_cpinstInit >= g_cpinstMax ) { Error( ErrERRCheck( JET_errTooManyInstances ) ); @@ -3635,6 +3783,7 @@ ERR ErrNewInst( // but if we're creating a _new_ DB, and we happen to be in the downgrade window the new DB would use engine // default (i.e. upgradeed) and then if downgrade happens, we couldn't attach it, so we actually sort of need // to set this back to an old / safe version. + // See also FUTURE-2018/07/25-BrettSh comment. pinst->m_rgparam[ JET_paramEngineFormatVersion ].Reset( pinst, JET_efvExchange2016Cu1Rtm | JET_efvAllowHigherPersistedFormat ); } } @@ -3824,10 +3973,14 @@ LOCAL ERR ErrFindPinst( JET_INSTANCE jinst, INST **ppinst, INT *pipinst = NULL ) case runInstModeOneInst: // find the only one instance, ignore the given instance // since the given one may be bogus + // FUTURE-2013/10/16-BrettSh - Why would it not always be + // slot [0]? This applies to other APIs as well, such as + // JetGet|SetSystemParameter(). for ( ipinst = 0; ipinst < g_cpinstMax; ipinst++ ) { if ( pinstNil != g_rgpinst[ ipinst ] ) { + // testing the FUTURE-2013/10/16-BrettSh comment. Assert( ipinst == 0 ); *ppinst = g_rgpinst[ ipinst ]; if ( pipinst ) @@ -3941,7 +4094,7 @@ class APICALL { protected: ERR m_err; - INT m_op; // 2014/11/03-SOMEONE - To make the change easier, we cache the op so that we don't have to modify every call site. + INT m_op; // 2014/11/03-UmairA - To make the change easier, we cache the op so that we don't have to modify every call site. INT m_opOuter; // This is the saved op of the "outer" JET API when we come into another JET API from a JET callback. IOREASONTERTIARY m_iortOuter; @@ -4490,6 +4643,13 @@ ERR INST::ErrAPIEnterForInit() ERR INST::ErrAPIEnterWithoutInit( const BOOL fAllowInitInProgress ) { ERR err; + // NTRAID#ESE-115-2013/06/11-BrettSh - This has a fundamental timing hole ... while + // it will work while the INST is in the _MIDDLE_ of init(w/ err) or term, near + // the end it is functionally broken b/c we're using a member variable of the + // INST * that is to be deallocated on Term or on a failure during Init. We can + // probably solve this by moving this m_cSessionInJetAPI into the g_rgpinst next + // to the INST *, and then give out the index to that array slot instead of the + // INST * we do today. LONG lOld = AtomicExchangeAdd( &m_cSessionInJetAPI, 1 ); if ( ( lOld & maskAPILocked ) && // API can't be locked, unless ... @@ -4610,6 +4770,9 @@ VOID INST::EnterCritInst() { g_critInst.Enter(); } VOID INST::LeaveCritInst() { // We should have consistent g_runInstMode and g_cpinstInit parameters at this point + // FUTURE-2007/11/05-BrettSh - I am dubious that it is a good idea to allow + // us to be in no mode and one mode w/ g_cpinstInit 0 or 1. Ideally, we'd make + // the state transition to onemode at the same time as allocating an inst ... Assert( ( runInstModeNoSet == g_runInstMode && 2 > g_cpinstInit ) || ( runInstModeOneInst == g_runInstMode && 2 > g_cpinstInit ) || ( runInstModeMultiInst == g_runInstMode ) ); @@ -4660,6 +4823,24 @@ ERR INST::ErrINSTSystemInit() Assert( 0 == g_cpinstInit ); Assert( g_rgpinst == NULL ); // or we'll be leaking memory. + // FUTURE-2013/10/22-BrettSh - I've flown too high, and got burned by trying + // to move this code from the RUNINSTSetModeOneInst()/RUNINSTSetModeMultiInst() + // area ... this runs afoul of some perfmon re-allocation protection in + // ErrOSUInit(). What should happen? See RUNINSTSetModeMultiInst(). + //switch( RUNINSTGetMode() ) + // { + //case runInstModeOneInst: + // g_cpinstMax = 1; + // g_ifmpMax = g_cpinstMax * dbidMax + cfmpReserved; + // break; + //case runInstModeMultiInst: + // g_cpinstMax = (ULONG)UlParam( JET_paramMaxInstances ); + // g_ifmpMax = g_cpinstMax * dbidMax + cfmpReserved; + // break; + //default: + // // No appropriate mode set before ErrIsamSystemInit()/ErrINSTSystemInit()! + // EnforceSz( fFalse, "InvalidInstMode" ); + // } Alloc( g_rgpinst = new INST*[g_cpinstMax] ); memset( g_rgpinst, 0, sizeof(INST*) * g_cpinstMax ); @@ -5588,7 +5769,7 @@ ERR CJetParam::GetString( // UNICODE_COMPATIBILITY: // This is tricky, b/c JET used to just truncate the value, not return error if not enough buffer. - // However, I (SOMEONE) think that we should change the contract, because anyone who is + // However, I (Brett) think that we should change the contract, because anyone who is // getting a truncated string, is proably unknowingly failing in some logical way. // err = ErrOSStrCbCopyW( wszParam, cbParamMax, (WCHAR*)pjetparam->m_valueCurrent ); @@ -5980,7 +6161,7 @@ ERR CJetParam::IllegalClone( CJetParam* const pjetparamSrc, // 32kb pages: 8,150 // 16kb pages: 4,050 // -// 2017/11/15-SOMEONE - With prefix-compression on LID64, current chunk size still allows us to store 4 chunks + LVROOT on 1 page. +// 2017/11/15-UmairA - With prefix-compression on LID64, current chunk size still allows us to store 4 chunks + LVROOT on 1 page. // We can avoid changing the chunk size and still get optimal storage characteristics. // // (The chunk size on 8kb and 4kb pages stays the same to preserve @@ -6089,6 +6270,10 @@ GetCommitDefault( const CJetParam* const pjetparam, if ( pinst != pinstNil ) { + // FUTURE-2011/08/15-BrettSh - It is interesting and probably a poor choice that + // you have to specify both a pinst and a ppib because a ppib necessarily implies + // a specific pinst. Consider moving this if ( ppib ) out of the if ( pinst ) + // clause above. if ( ppib != ppibNil ) { ULONG ulActual; @@ -6102,6 +6287,8 @@ GetCommitDefault( const CJetParam* const pjetparam, } else { + // FUTURE-2011/08/31-BrettSh - Consider removing this and making it set the grbit + // in the actual INST's param table instead. *((JET_GRBIT*)pulParam) = pinst->m_grbitCommitDefault; } } @@ -6254,6 +6441,8 @@ SetCheckpointDepthMax( CJetParam* const pjetparam, ERR err = JET_errSuccess; Assert( pjetparam->m_paramid == JET_paramCheckpointDepthMax ); + // ISSUE-2012/04/11-BrettSh - Stupidly we can't set this b/c interop just passes an int arg to both params! + //Expected( wszParam == NULL ); Expected( ppib == ppibNil ); Call( CJetParam::SetInteger( pjetparam, pinst, ppib, ulParam, wszParam ) ); @@ -6723,7 +6912,7 @@ struct ConfigSetOverrideValue #define CO( paramid, value, flags ) { value, paramid, flags } -// It is important (cough, SOMEONE, cough, SOMEONE, cough SOMEONE) that we do not give into our desire to +// It is important (cough, Anil, cough, Jonathan, cough Ian) that we do not give into our desire to // easily control our clients params with our binary and define only configuration sets that can be // abstracted to a logically sensible configuration for our engine that is not a layering violation. @@ -6739,6 +6928,8 @@ const ConfigSetOverrideValue g_rgJetConfigRemoveQuotas[] = { // parameters. Ignoring the fact that many of these arguments did FORMERLY increase memory usage. }; +// FUTURE-2012/03/13-BrettSh - Some others that we might want to consider tuning ... esp. for the +// fact that ManagedStore runs with B Tree Defrag and DbScan ... though maybe they'll turn these off. /* NORMAL_PARAM( JET_paramDefragmentSequentialBTrees, CJetParam::typeBoolean, 0, 0, 0, 0, 0, -1, 1 ), @@ -6769,6 +6960,9 @@ const ConfigSetOverrideValue g_rgJetConfigRemoveQuotas[] = { const ConfigSetOverrideValue g_rgJetConfigLowMemory[] = { //CO( JET_paramEnableAdvanced, fFalse, 0x0 ), // pre-win8 we used to disable this on smallConfig, but it turned out to be a terrible idea + // FUTURE-2012/03/12-BrettSh - future site for breaking out the small config settings into their + // own table. These variables should be checked for consistency with those a 2nd time when that + // is done. // Global Component Control // @@ -6877,6 +7071,9 @@ const ConfigSetOverrideValue g_rgJetConfigSSDProfileIO[] = { //CO( JET_paramMaxCoalesceReadGapSize, 256*1024, 0x0 ), // we actually want read gapping (according to NT perf team) ... though in theory we should NOT need it with SSDs. Leaving it at default 256 KB, so we don't get carried away. Have not verified ourselves. CO( JET_paramMaxCoalesceWriteGapSize, 0, 0x0 ), // avoid overwriting pieces of the disk that do not need to be updated CO( JET_paramOutstandingIOMax, 16, 0x0 ), // do not need to be as aggressive for read IO max + // FUTURE-2012/03/13-BrettSh - Consider bringing this parameter back to hobble the write IO aggressiveness asymetrically + // compared to the read IO aggressiveness / JET_paramOutstandingIOMax. + //CO( JET_paramCheckpointIOMax, 1, 0x0 ), // not used right now CO( JET_paramPrereadIOMax, 5, 0x0 ), // should not need as much prereading for SSDs }; @@ -6889,6 +7086,15 @@ const ConfigSetOverrideValue g_rgJetConfigRunSilent[] = { // disables ETW tracing - which looks especially expensive now. }; +// FUTURE-2012/03/15-BrettSh - List of ideas for other configurations ... +// - Like LowMemory and MediumMemory, there is probably room for RunSilent differentiation above, +// like perhaps a RunQuiet, and RunVerbose or RunChatty or RunNoisy. Martin thinks RunStealth, +// RunSneak, and RunCloaked are good, and I'll add to that RunNinja is a good one! Honestly, +// it's like little boys are writing this code! +// - Another one might be like JET_configBenchmark or JET_configTopPerf or something that takes +// all the stops out and runs us in the fastest / heaviest weight mode possible. +// - Any others? +// #undef CO @@ -6978,6 +7184,9 @@ VOID SetJetConfigSet( INST * const pinst, _In_reads_(cConfigOverrides) const Con ulpFinal *= 2; } + // ISSUE-2012/03/17-BrettSh - We actually know the difference between when a client has + // set a param or not ... we could make the config sets skip parameters that the client has + // already set before setting the configuration parameter. // if pinst == NULL, then we're resetting parameters globally (not on a per-instance // basis), so we can reset this parameter now. @@ -7130,6 +7339,10 @@ SetConfiguration( CJetParam* const pjetparam, // if ( !( pinst != pinstNil && pjetparamT->FGlobal() ) ) { + // ISSUE-2012/03/12-BrettSh - Here (and the other 2 calls to Reset()) all + // can fail due to calling this at the wrong time, so the API has kind of + // a sucky contract in that some settings just may not be overriden with + // no feedback to the consumer. (void)pjetparamT->Reset( pinst, pjetparamT->m_valueDefault[ configLegacy & JET_configDefault ] ); } @@ -7182,6 +7395,10 @@ SetConfiguration( CJetParam* const pjetparam, if ( pinst == pinstNil || !pjetparamT->FGlobal() ) { + // FUTURE-2012/03/13-BrettSh - As soon as I can write a test to validate that I won't regress this, we'll + // take the riskier approach to move to the g_rgJetConfigLowMemory, g_rgJetConfigRemoveQuotas[] entries + // for configLegacySmall and remove the checks for configLegacy != configLegacySmall above and the extra + // array slot in the sys param definition. if ( pjetparamT->m_valueDefault[ configLegacySmall ] != pjetparamT->m_valueDefault[ configLegacyLegacy ] ) { (void)pjetparamT->Reset( pinst, pjetparamT->m_valueDefault[configLegacySmall] ); @@ -7198,6 +7415,7 @@ SetConfiguration( CJetParam* const pjetparam, } if ( configSet & JET_configUnthrottledMemory ) { + // FUTURE-2013/05/01-BrettSh - Fix JET_configDynamicHighMemory to do something. configSet &= ~JET_configUnthrottledMemory; } if ( configSet & JET_configSSDProfileIO ) @@ -7208,6 +7426,7 @@ SetConfiguration( CJetParam* const pjetparam, // after memory configs b/c relies on paramCacheSizeMax and most likely battery is the top-most concern if ( configSet & JET_configLowPower ) { + // FUTURE-2012/03/12-BrettSh - There are lots of things to consider, log file size, log buffers, etc. configSet &= ~JET_configLowPower; } // since these are specifically pools that don't affect memory size, it's fine to have near end @@ -7224,6 +7443,7 @@ SetConfiguration( CJetParam* const pjetparam, } if ( configSet & JET_configHighConcurrencyScaling ) { + // FUTURE-2013/05/01-BrettSh - Fix JET_configHighConcurrencyScaling to do something. configSet &= ~JET_configHighConcurrencyScaling; } @@ -7234,6 +7454,9 @@ SetConfiguration( CJetParam* const pjetparam, return ErrERRCheck( JET_errInvalidParameter ); } + // FUTURE-2012/03/13-BrettSh - It is worth noting that if we ever set any paths that get recalculated + // in here via FixDefaultSystemParameters(), we'll need to add another call to FixDefaultSystemParameters + // here. return err; } @@ -7462,7 +7685,28 @@ ErrSetCacheClosedTables( CJetParam* const pjetparam, value2 ) \ NORMAL_PARAMEX( #paramid, paramid, type, fAdvanced, fGlobal, fMayNotWriteAfterGlobalInit, fMayNotWriteAfterInstanceInit, rangeLow, rangeHigh, value, value2 ) +// ISSUE-2012/02/02-martinc: g_rgparamRaw *should* be on a read-only data page. At the +// time of this writing, it is for x86, but not amd64 or ARM. The latter platforms contain +// a dynamic initializer. Because we need to pay the COW penalty for most of our platforms +// regardless of whether it's 'const' or not, it is simpler to pay the COW penalty on +// all platforms for simpler code. +// +// If the g_rgparamRaw array can be made 'const' and eliminate the dynamic initializer, then +// we will want to keep it on a readonly page and copy it to some heap memory to avoid the +// COW operations. +// +// The following are known factors causing a dynamic initializer: +// -Presence of a destructor (~JetParam). +// -Initialization involving { a ? b : c }. (e.g. PFNGET_OF_TYPE). The compiler does not +// evaluate these ternary expressions at compile time. +// +// Even after eliminating these two factors, the dynamic initializer was still instantiated. +// ISSUE-2012/02/13-martinc. There is a bug in the Windows 8 OACR that causes the cl.exe wrapper +// to run away consuming gigabytes of memory, and thus causing a DoS on the build machine. +// The bug has been fixed in another branch, but may take a while to make it to our branch. +// So this #ifdef _PREFAST_ workaround does not have to be in here for long. +// Windows 8 Bugs:694176. // UNICODE_UNDONE_DEFERRED: Technically don't need to do this, but I noticed that if you set a default string value to ASCII string, there is no compile error, that means it is ripe for a bug. @@ -7544,8 +7788,12 @@ const size_t g_cparam = _countof( g_rgparamRaw ); C_ASSERT( sizeof( JetParam ) == sizeof( CJetParam ) ); // g_grparamRaw is an array of JetParam, and g_rgparam is an array of the child class CJetParam. +// FUTURE-2012/02/06-martinc. g_gparam may one day be dynamically allocated +// and copied from g_rgparamRaw. CJetParam* const g_rgparam = (CJetParam*) &g_rgparamRaw[ 0 ]; +// FUTURE-2015/03/04-AndyGo - Only remaining reliance is in ErrITSetConstants calling FixDefaultSystemParameters. +// Otherwise, FixDefaultSystemParameters is protected by g_critInst LOCAL CCriticalSection g_critSysParamFixup( CLockBasicInfo( CSyncBasicInfo( "g_critSysParamFixup" ), rankSysParamFixup, 0 ) ); // Some System Parameters need a dynamic default that can't be established at compile time. @@ -7595,6 +7843,10 @@ VOID FixDefaultSystemParameters() // Configure Default Path Variables // + // ISSUE-2013/11/06-BrettSh - Didn't think you could create a pfsapi before ErrOSUInit() ... someone + // should investigate that. If not valid, fix. IF valid, then perhaps change all the other OSU + // users (JetGetDatabaseFileInfo, JetRemoveLogFile, etc) off the OSU so we don't have that perfmon + // allocation pre-init problems. Call( ErrOSFSCreate( g_pfsconfigGlobal, &pfsapi ) ); Call( pfsapi->ErrPathFolderDefault( rgwchDefaultPath, _countof( rgwchDefaultPath ), &fIsDefaultDirectory ) ); @@ -7846,6 +8098,14 @@ ERR ErrSysParamLoadDefaults( const BOOL fHasCritInst, INST * pinst, CConfigStore Assert( !fHasCritInst || INST::FOwnerCritInst() ); + // FUTURE-2013/11/02-BrettSh - It occurred to me a bit late, that it is possibly more + // efficient to use some form of registry enumeration API ... rather than test get from + // registry almost 200 values. Of course then you'd have to search through all the + // strings for the param table looking for a match! :P Blech. + // ISSUE-2013/11/06-BrettSh - Got a build error by declaring cparam as "const cparam" in + // the arg list above (which I didn't even know you could do, must be assuming int), and + // then not having a cast to (size_t) here ... BUT only failed build in focus, NOT on + // local box. Debug that, and fix local build to match focus build. Seriously what the? for ( size_t iparamid = 0; iparamid < (size_t)cparam; iparamid++ ) { Assert( prgparam[iparamid].m_paramid == iparamid ); // sanity check @@ -8054,9 +8314,9 @@ class InitCallbackWrapper }; // -// In what can only be described as a new-wave fusion of Romanian SOMEONE-escu-esc CAuto -// class model sprinkled with SOMEONE-like char/wchar agnostic _T-esk templating gloss, to -// create a not quite break the debugger SOMEONEian templated auto class for converting +// In what can only be described as a new-wave fusion of Romanian Andrei-escu-esc CAuto +// class model sprinkled with JLiem-like char/wchar agnostic _T-esk templating gloss, to +// create a not quite break the debugger Goodsellian templated auto class for converting // V1 index create structures to type V2 index create structures. // template< class JET_INDEXCREATE_T, class JET_INDEXCREATE2_T > @@ -8158,9 +8418,9 @@ CAutoINDEXCREATE1To2_T< JET_INDEXCREATE_T, JET_INDEXCREATE2_T >::~CAutoINDEXCREA //===================================================== // -// In what can only be described as a new-wave fusion of Romanian SOMEONE-escu-esc CAuto -// class model sprinkled with SOMEONE-like char/wchar agnostic _T-esk templating gloss, to -// create a not quite break the debugger SOMEONEian templated auto class for converting +// In what can only be described as a new-wave fusion of Romanian Andrei-escu-esc CAuto +// class model sprinkled with JLiem-like char/wchar agnostic _T-esk templating gloss, to +// create a not quite break the debugger Goodsellian templated auto class for converting // V2 index create structures to type V3 index create structures. // template< class JET_INDEXCREATE2_T, class JET_INDEXCREATE3_T > @@ -12212,6 +12472,9 @@ LOCAL JET_ERR JetGetVersionEx( _In_ JET_SESID sesid, _Out_ ULONG *pVersion ) // 27-31 Image Major // // + // ISSUE-2009/08/03-KetanD - Need to revise this before we get to Windows 16. + // ISSUE-2009/08/03-KetanD - On Windows, we are relying on the build number not resetting for + // service packs. If that changes, our version ULONG moves backwards. #ifdef ESENT // assert no aliasing (i.e. overlap) of version information @@ -12225,6 +12488,9 @@ LOCAL JET_ERR JetGetVersionEx( _In_ JET_SESID sesid, _Out_ ULONG *pVersion ) Assert( DwUtilSystemServicePackNumber() < 1 << 8 ); + // ISSUE-2009/08/03-KetanD - Note that we had not been using BuildNumberMinor or ImageVersionMinor for Windows since 9/2004. + // When we revise this API, we can try having a build number again. The lack of ImageVersionMinor + // was unintentional. Using service packs for low bits is questionable. const ULONG ulVersion = ( ( DwUtilImageVersionMajor() & 0xF ) << 28 ) + ( ( DwUtilImageVersionMinor() & 0xF ) << 24 ) + @@ -12291,6 +12557,11 @@ LOCAL JET_ERR JetGetSystemParameterEx( _In_ JET_INSTANCE instance, _In_ JET_SESID sesid, _In_ ULONG paramid, + // FUTURE-2005/06/09-BrettSh - This changing of SAL is for a SINGLE JET param that I know + // of, JET_paramErrorToString, where it uses an OUT param as basically an IN param!! Guh. + // We'd be best to put this back, and make the clients use some other function for getting + // errors. + // __out_bcount_opt(cbMax) JET_API_PTR *plParam, __out_opt JET_API_PTR * plParam, _Out_opt_z_bytecap_( cbMax ) JET_PWSTR wszParam, _In_ ULONG cbMax ) @@ -12382,6 +12653,10 @@ LOCAL JET_ERR JetGetSystemParameterExA( _In_ JET_INSTANCE instance, _In_ JET_SESID sesid, _In_ ULONG paramid, + // FUTURE-2005/06/09-BrettSh - This changing of SAL is for a SINGLE JET param that I know + // of, JET_paramErrorToString, where it uses an OUT param as basically an IN param!! + // We'd be best to put this back, and make the clients use some other function for getting + // errors. __out_opt JET_API_PTR * plParam, _Out_opt_z_bytecap_( cbMax ) JET_PSTR szParam, _In_ ULONG cbMax ) @@ -13869,6 +14144,13 @@ LOCAL JET_ERR JetGetPageInfoEx( || pgnoNull == pgno || pgnoMax == pgno ) { + // FUTURE-2006/08/23-JLiem - AndyGo suggested + // setting a flag in JET_PAGEINFO to indicate + // that this page is being interpreted as a + // trailer page (so the caller can verify + // that this is in fact the last physical + // page in the database) + // pagetype = databaseHeader; pgno = pgnoNull; } @@ -15015,6 +15297,9 @@ C_ASSERT( sizeof(JET_TABLECREATE2_W) != sizeof(JET_TABLECREATE3_A) ); LOCAL JET_ERR JetCreateTableColumnIndexEx( _In_ JET_SESID sesid, _In_ JET_DBID dbid, + // NTRAID#ESE-125-2014/06/18-BrettSh - Are you implementing JET_TABLECREATE6_A !?!?! Then please please + // please please split out the in and out args per the ESE bug cited. + // ISSUE ISSUE / FUTURE FUTURE - NOTICE ME. __inout JET_TABLECREATE5_A * ptablecreate ) { JET_ERR err; @@ -15047,6 +15332,13 @@ LOCAL JET_ERR JetCreateTableColumnIndexEx( } else { + // FUTURE-2014/07/09-martinc;BrettSh - This is a particularly inefficient method of creating + // a table because of the forced deep copy. The problems preventing passing the original + // structure with JET_bitTableCreateImmutableStructure are: + // - The list of columnids are updated in rgcolumns, that are consumed later in ErrFILEICreateIndexes (columnidT = pcolcreate->columnid;). + // - (Solvable) Closing the exclusively-opened tableid. + // - (Solvable) Not writing the index id's. + // - Unknown further issues beyond ... ;-) CAutoTABLECREATE5To5_T< JET_TABLECREATE5_A, JET_TABLECREATE5_A, CAutoINDEXCREATE3To3_T< JET_INDEXCREATE3_A, JET_INDEXCREATE3_A > > tablecreate; Call( tablecreate.ErrSet( ptablecreate ) ); @@ -17024,6 +17316,9 @@ LOCAL JET_ERR JetBackupInstanceEx( if ( apicall.FEnter( instance ) ) { + // ISSUE-2009/12/05-BrettSh - I don't really like this, because the backup callback + // doesn't have to match the Init callback. The backup even has a JET_SESID that we + // could pass to the backup callback. InitCallbackWrapper initCallbackWrapper(pfnStatus); apicall.LeaveAfterCall( apicall.Pinst()->m_fBackupAllowed ? ErrIsamBackup( (JET_INSTANCE)apicall.Pinst(), wszBackupPath, grbit, InitCallbackWrapper::PfnWrapper, &initCallbackWrapper ) : @@ -17101,6 +17396,9 @@ JET_ERR JET_API JetRestoreA( _In_ JET_PCSTR szSource, __in_opt JET_PFNSTATUS Assert( fInitd == ( g_rgpinst != NULL ) ); + // ISSUE-2013/10/15-BrettSh - I'm not convinced this is safe, we definitely could have someone + // term at the same time, and a bad race and AV here. I don't think we should spin too many + // cycles on a concurrent race condition, that could cause other AVs as well. err = JetRestoreInstanceA( g_rgpinst ? (JET_INSTANCE)g_rgpinst[0] : NULL, szSource, NULL, pfn ); // I am not sure this holds ... @@ -19861,7 +20159,7 @@ LOCAL JET_ERR JetDBUtilitiesEx( JET_DBUTIL_W *pdbutilW ) // With the introduction of restartable seeds, some long-held assumptions and checks about the backup set and its // required range may not hold anymore. Therefore, this JET_bitDBUtilOptionSkipMinLogChecksUpdateHeader option was // created to handle that case. - // I (SOMEONE) think we shouldn't even have the bit and just always pass fTrue to the function below to signal + // I (ADaCosta) think we shouldn't even have the bit and just always pass fTrue to the function below to signal // that the backup set is being handled externally, regardless of whether or not it was restarted. Perhaps we // should do that in the future and deprecate exposing the bit in the first place. // @@ -20984,10 +21282,19 @@ ERR ErrTermComplete( JET_INSTANCE instance, JET_GRBIT grbit ) { pinst->m_pbackup->BKLockBackup(); + // FUTURE-2006/03/11-BrettSh - may be concurrency hole, b/c logutil.cxx doesn't check + // m_fBackupAllowed inside this crit section? Check thoroughly ... pinst->m_pbackup->BKUnlockBackup(); // Lazy way to wait until the snapshot terminates / aborts and comes back to us, letting // the term thread go on normally. + // FUTURE-2006/06/10-BrettSh - We should signal the freeze-thaw thread that we can + // abort and cleanup the snapshow backup immediately. For some reaon though this + // all finishes fairly quickly so it seems not needed? Not sure I understand why. + // In theory it shouldn't be that hard to just call snapshot abort if this is taking + // too long for anyone? + // + // Exchange12 138101: Yield to VSS writer thread to avoid deadlock while( pinst->m_pOSSnapshotSession != NULL ) { CESESnapshotSession::SnapshotCritLeave(); @@ -21222,6 +21529,9 @@ LOCAL JET_ERR JetEnableMultiInstanceEx( WCHAR wszParamName[100]; OSStrCbFormatW( wszParamName, sizeof(wszParamName), L"%hs", g_rgparam[iparamid].m_szParamName ); const WCHAR * rgwszT[] = { wszParamName }; + // FUTURE-2013/11/20-BrettSh - It would be cool if we could generate a string for the identity + // of the _other_ instance that is _most likely_ currently initialized so that we could identify + // the conflicting service. UtilReportEvent( eventWarning, GENERAL_CATEGORY, GLOBAL_SYSTEM_PARAMETER_NOT_SET_PREVIOUSLY_MISMATCH_ID, @@ -21234,6 +21544,13 @@ LOCAL JET_ERR JetEnableMultiInstanceEx( IBitmapAPI::ERR errBitmap = fbm.ErrSet( iparamid, fTrue ); Assert( errBitmap == IBitmapAPI::ERR::errSuccess ); + // ISSUE-2013/11/15-BrettSh comment we just have to load reg defaults + // here but in a special way that says we're only interested in the set property ... and to + // check the equality below ... I think. Maybe we can just load this specific param and see if + // the value matches? Think about how this integrates w/ people setting paramConfigStoreSpec + // itself! Ugh ... that seems like it would break, but I think it works? Find the test. OHHH, + // I see we won't set it the 2nd time, so if the reg values have changed, we'll ignore it. Log + // an event or fail out or both if they don't match? switch( g_rgparam[iparamid].Type_() ) { @@ -21270,6 +21587,9 @@ LOCAL JET_ERR JetEnableMultiInstanceEx( WCHAR wszParamName[100]; OSStrCbFormatW( wszParamName, sizeof(wszParamName), L"%hs", g_rgparam[iparamid].m_szParamName ); const WCHAR * rgwszT[] = { wszParamName }; + // FUTURE-2013/11/20-BrettSh - It would be cool if we could generate a string for the identity + // of the _other_ instance that is _most likely_ currently initialized so that we could identify + // the conflicting service. UtilReportEvent( eventWarning, GENERAL_CATEGORY, GLOBAL_SYSTEM_PARAMETER_MISMATCH_ID, @@ -21299,6 +21619,9 @@ LOCAL JET_ERR JetEnableMultiInstanceEx( // Since the registry defaults _appear written_, it can seem like the // two sets disagree. We will assume that if they set the same value // that they got the same set of params. This is not the safest bet. + // FUTURE-2013/11/15-BrettSh - Make this fail depending upon if the + // actual registry parameters values have changed. A sort of deep + // read of the fact. !g_rgparam[iparamid].m_fRegDefault ) { Assert( fOriginallySet ); @@ -21306,6 +21629,9 @@ LOCAL JET_ERR JetEnableMultiInstanceEx( WCHAR wszParamName[100]; OSStrCbFormatW( wszParamName, sizeof(wszParamName), L"%hs", g_rgparam[iparamid].m_szParamName ); const WCHAR * rgwszT[] = { wszParamName }; + // FUTURE-2013/11/20-BrettSh - It would be cool if we could generate a string for the identity + // of the _other_ instance that is _most likely_ currently initialized so that we could identify + // the conflicting service. UtilReportEvent( eventWarning, GENERAL_CATEGORY, GLOBAL_SYSTEM_PARAMETER_SET_PREVIOUSLY_MISMATCH_ID, @@ -21490,10 +21816,10 @@ LOCAL JET_ERR JetInitEx( const ULONG cbTimingResourceDataSequence = pinst->m_isdlInit.CbSprintTimings(); WCHAR * wszTimingResourceDataSequence = (WCHAR *)_alloca( cbTimingResourceDataSequence ); pinst->m_isdlInit.SprintTimings( wszTimingResourceDataSequence, cbTimingResourceDataSequence ); - const __int64 secsInit = pinst->m_isdlInit.UsecTimer( eSequenceStart, eInitDone ) / 1000000; // convert to seconds - WCHAR wszSeconds[16]; + const double secsInit = (double)pinst->m_isdlInit.UsecTimer( eSequenceStart, eInitDone ) / 1000000.0; // convert to seconds + WCHAR wszSeconds[30]; WCHAR wszInstId[16]; - OSStrCbFormatW( wszSeconds, sizeof(wszSeconds), L"%I64d", secsInit ); + OSStrCbFormatW( wszSeconds, sizeof(wszSeconds), L"%.3f", secsInit ); OSStrCbFormatW( wszInstId, sizeof(wszInstId), L"%d", IpinstFromPinst( pinst ) ); const WCHAR * rgszT[4] = { wszInstId, wszSeconds, wszTimingResourceDataSequence, wszAdditionalFixedData }; @@ -21530,6 +21856,77 @@ LOCAL JET_ERR JetInitEx( { const WCHAR* wszInstDisplayName = ( pinst != NULL && pinst->m_wszDisplayName != NULL ? pinst->m_wszDisplayName : L"_unknown_" ); OSDiagTrackInit( wszInstDisplayName, pinst->m_plog->QwSignLogHash(), err ); + + // avoiding quick and dirty non-localized insert text on windows +#ifdef ENABLE_MICROSOFT_MANAGED_DATACENTER_LEVEL_OPTICS + + pinst->m_isdlInit.Trigger( eInitDone ); + const double secsInit2 = (double)pinst->m_isdlInit.UsecTimer( eSequenceStart, eInitDone ) / 1000000.0; // convert to seconds + WCHAR wszSeconds2[30]; + OSStrCbFormatW( wszSeconds2, sizeof(wszSeconds2), L"%.3f", secsInit2 ); + + WCHAR wszErrorState[120]; + JET_ERRCAT errcatMostSpecific = JET_errcatUnknown; + (void)ErrERRLookupErrorCategory( err, &errcatMostSpecific ); + if ( PefLastThrow() && err == PefLastThrow()->Err() ) + { + PERSISTED // for optics "(JET_errcat: 10)", etc. see Exch \ EseEventCategorized.cs. + OSStrCbFormatW( wszErrorState, sizeof(wszErrorState), L"%d (JET_errcat: %d) (src: %hs:%d)", err, errcatMostSpecific, SzSourceFileName( PefLastThrow()->SzFile() ), PefLastThrow()->UlLine() ); + } + else + { + PERSISTED // for optics "(JET_errcat: 10)", etc. see Exch \ EseEventCategorized.cs. + OSStrCbFormatW( wszErrorState, sizeof(wszErrorState), L"%d (JET_errcat: %d)", err, errcatMostSpecific ); + } + + WCHAR wszFailingMode[2] = { WchReportInstState( pinst ), L'\0' }; + + WCHAR wszFailingAddress[60]; + // The normal way of detecting recovery \ redo via: + // plog->FRecovering() && plog->FRecoveringMode() == fRecoveringRedo + // is controlled and cleaned up by this point even on an error. However, fortunately + // the pinst->m_perfstatusEvent mode is one way during init, and not reset until next + // call to JetInit() so we use this method for determining what mode we reached. + const BOOL fRedo = pinst->m_perfstatusEvent == perfStatusRecoveryRedo; + const BOOL fUndo = pinst->m_perfstatusEvent == perfStatusRecoveryUndo; + const BOOL fDo = pinst->m_perfstatusEvent == perfStatusRuntime; + // Normal method of getting lpgosRedo (plog->LgposLGLogTipNoLock()) won't work for + // the same reason the regular mode computation, computes it wrong above. But the + // actual lgpos we want is in m_lgposRedo, so use special function to fetch it. + LGPOS lgposFailed = !fUndo ? // just in case, we treat everything besides undo as redo. + pinst->m_plog->LgposDiagnosticRedoFailedAddress() : + pinst->m_plog->LgposLGLogTipNoLock(); // undo address comes from live lgpos tip. + // Can imagine actually sticking other pieces of address in here, like the pgno the LR was + // referencing, or even logical descriptions like "DbfilehdrReadErr" or something. + OSStrCbFormatW( wszFailingAddress, sizeof( wszFailingAddress ), + L"lgpos%hs:%08x:%04x:%04x", + fRedo ? "Redo" : + ( fUndo ? "Undo" : + ( fDo ? "RedoOld" : + "Redo-Unconfirmed" ) ), + lgposFailed.lGeneration, lgposFailed.isec, lgposFailed.ib ); + + WCHAR wszHaPublishingFacts[300]; + PERSISTED // for optics "Verbose: 1", "FI Tags Published: 0x", and "FiCorruptionTag " / "FiLogLogicallyInconsistent ". see Exch \ EseEventCategorized.cs, Exch \ EseDatabaseMonitoringContext.cs + (void)ErrOSStrCbFormatW( wszHaPublishingFacts, sizeof( wszHaPublishingFacts ), L"Verbose: %d, FI Tags Published: 0x%x ( %hs%hs%hs)", + !!pinst->m_isdlInit.FTriggeredStep( eInitLogRecoverySilentRedoDone ), + pinst->m_grbitHaFailureTags, +#if defined( USE_HAPUBLISH_API ) + ( pinst->m_grbitHaFailureTags & bitHaPublishedCorruptionTag ) ? "FiCorruptionTag " : "", + ( pinst->m_grbitHaFailureTags & bitHaPublishedIoHardTag ) ? "FiIoHardTag " : "", + ( pinst->m_grbitHaFailureTags & bitHaPublishedLogLogicallyInconsistentTag ) ? "FiLogLogicallyInconsistent " : "" +#else + "", "", "" +#endif + ); + + const WCHAR * rgszFailT[6] = { wszInstDisplayName, wszErrorState, wszSeconds2, wszFailingMode, wszFailingAddress, wszHaPublishingFacts }; + + UtilReportEvent( eventError, GENERAL_CATEGORY, START_INSTANCE_FAILED_ID, _countof( rgszFailT ), rgszFailT, 0, NULL, pinst ); + + // Also to avoid event wrap, report failures in JetInit() to .IRS.RAW + DumpFailedInitToIrsRaw( pinst, wszInstDisplayName, wszErrorState, wszSeconds2, wszFailingMode, wszFailingAddress, wszHaPublishingFacts ); +#endif } // if instance allocated in this function call @@ -22306,6 +22703,9 @@ JET_ERR JET_API JetTerm2( _In_ JET_INSTANCE instance, _In_ JET_GRBIT grbit ) JET_TRY( opTerm, JetTermEx( instance, grbit ) ); } +// FUTURE-2012/04/10-BrettSh - The new version came in too late to risk the existing API (says +// Alex) so we'll make the v1 JET API call this and v2 will call the new JetStopServiceInstanceEx() +// API. In Win9 we can move the v1 to the v2 API with the JET_bitStopServiceAll. JET_ERR JET_API JetStopServiceInstanceExOld( _In_ JET_INSTANCE instance ) { ERR err; @@ -22313,6 +22713,8 @@ JET_ERR JET_API JetStopServiceInstanceExOld( _In_ JET_INSTANCE instance ) OSTrace( JET_tracetagAPI, OSFormat( "Start %s(0x%Ix)", __FUNCTION__, instance ) ); + // ISSUE-2013/10/16-BrettSh - We didn't do APICALL_INST::FEnter(), nor are we in + // INST::FOwnerCritInst() ... so I don't think this is actually safe. CallR( ErrFindPinst( instance, &pinst ) ); // Halt OLD for this instance @@ -22353,6 +22755,8 @@ JET_ERR JET_API JetStopServiceInstanceEx( _In_ JET_INSTANCE instance, _In_ JET_G // Validate and retrieve args // + // ISSUE-2013/10/16-BrettSh - We didn't do APICALL_INST::FEnter(), nor are we in + // INST::FOwnerCritInst() ... so I don't think this is actually safe. CallR( ErrFindPinst( instance, &pinst ) ); const JET_GRBIT bitStopServiceAllInternal = 0x1; @@ -22418,6 +22822,8 @@ JET_ERR JET_API JetStopServiceInstanceEx( _In_ JET_INSTANCE instance, _In_ JET_G pinst->m_fCheckpointQuiesce = fFalse; + // FUTURE-2012/04/21-BrettSh - Should we reject this if ( 0 == ( pinst->m_grbitStopped & JET_bitStopServiceQuiesceCaches ) ) ... + // it means they asked for a service to be resumed that wasn't stopped in the first place? Well the contract is fulfilled. // We don't need to call ErrIOUpdateCheckpoints() like we do for quiesce, because it // wouldn't do anything ... so we'll let the user drive the checkpoint back up from @@ -22506,6 +22912,8 @@ JET_ERR JET_API JetStopServiceInstanceEx( _In_ JET_INSTANCE instance, _In_ JET_G { // Halt OLD for this instance + // FUTURE-2012/04/10-BrettSh - These should all in the fullness of time move under + // the JET_bitStopServiceBackgroundUserTasks bit, once they are all restartable. DBMScanStopAllScansForInst( pinst ); OLDTermInst( pinst ); OLD2TermInst( pinst ); @@ -22516,10 +22924,18 @@ JET_ERR JET_API JetStopServiceInstanceEx( _In_ JET_INSTANCE instance, _In_ JET_G //OnDebug( grbitCheck &= ~bitStopServiceAllInternal ); } + // FUTURE-2012/04/10-BrettSh - This method is essentially Jetterm broken up piece-meal, + // and I think I would like to see JetTerm() implemented as each piece of these done + // independently. + // FUTURE-2012/04/10-BrettSh - It would be good to implement JetStopBackupInstanceEx as + // another grbit here. if ( grbit & JET_bitStopServiceBackgroundUserTasks ) { // Halt OLDv2/B+ Tree defrag for this instance + // ISSUE-2012/04/18-BrettSh - It would be better to suspend in-progress B+ tree defrags + // as well, BUT the way Exchange will use this (as it will be suspended _most_ of the + // time until the maintenance window), then we're unlikely to need this. FMP::EnterFMPPoolAsWriter(); FMP * pfmpCurr = NULL; if ( pinst && pinst->m_fJetInitialized ) @@ -22653,6 +23069,8 @@ LOCAL JET_ERR JetStopBackupInstanceEx( _In_ JET_INSTANCE instance ) OSTrace( JET_tracetagAPI, OSFormat( "Start %s(0x%Ix)", __FUNCTION__, instance ) ); + // ISSUE-2013/10/16-BrettSh - We didn't do APICALL_INST::FEnter(), nor are we in + // INST::FOwnerCritInst() ... so I don't think this is actually safe. CallR( ErrFindPinst( instance, &pinst ) ); if ( pinst->m_plog ) @@ -23387,6 +23805,9 @@ JET_ERR ErrTESTHOOKAlterDatabaseFileHeader( const JET_TESTHOOKALTERDBFILEHDR * c Call( ErrUtilReadShadowedHeader( pinstNil, pfsapi, pfapiDatabase, JET_filetypeDatabase, (BYTE*)pdbfilehdr, (DWORD)g_cbPageMax, (LONG)OffsetOf( DBFILEHDR_FIX, le_cbPageSize ), urhfReadOnly|urhfNoFailOnPageMismatch, &cbPageSize, &shs ) ); Call( CFlushMapForUnattachedDb::ErrGetPersistedFlushMapOrNullObjectIfRuntime( palterdbfilehdr->szDatabase, pdbfilehdr, pinstNil, &pfm ) ); + // FUTURE-2016/03/16-BrettSh - Be better if we kept the flush map up to date, but not critical to this + // feature, so dropping. + //Alloc( pfm ); // Must be after ErrUtilReadShadowedHeader() so we have page size. if ( ( palterdbfilehdr->ibField + palterdbfilehdr->cbField ) > cbPageSize ) @@ -23470,10 +23891,16 @@ JET_ERR JET_API JetTestHook( } break; + // FUTURE-2010/01/02-BrettSh - Try to integrate this testing method to the other and + // bring convergence to how we trigger unit tests. Probably need to move ErrOSUInit() + // above to make that happen. case opTestHookUnitTests2: { const JET_TESTHOOKUNITTEST2* const pParams = reinterpret_cast( pv ); + // ISSUE-2010/01/25-BrettSh - In one of fugliest API decisions ever, the + // JET_dbidNil (JET_DBIDs which get casted to IFMPs internally) is different + // from ifmpNil. const INT failures = JetUnitTest::RunTests( pParams->szTestName, pParams->dbidTestOn == JET_dbidNil ? ifmpNil : (IFMP)pParams->dbidTestOn ); if( failures > 0 ) @@ -23504,7 +23931,7 @@ JET_ERR JET_API JetTestHook( { if ( pParams->type != JET_TestInjectFault || pParams->grbit != JET_bitInjectionProbabilityPct || - pParams->ulProbability != 5 /* b/c that's what g_bflruk is using, move along SOMEONE */ ) + pParams->ulProbability != 5 /* b/c that's what g_bflruk is using, move along Alex */ ) { // Call( ErrERRCheck( JET_errInvalidParameter ) ); @@ -23652,6 +24079,10 @@ JET_ERR JET_API JetTestHook( const LGPOS lgposNewest = pinst->m_plog->LgposLGLogTipNoLock(); const __int64 cbCheckpointDepth = (__int64)pinst->m_plog->CbLGOffsetLgposForOB0( lgposNewest, lgposCheckpoint ); + // ISSUE-2012/04/18-BrettSh - I've gotten -8 out of this calculation! This is because the + // lgposTip is at beginning of the last LR pushed into the log buffer, not after it. So + // this should just be calculated as zero. We should fix this in log if possible, this is + // just silly. Assert( cbCheckpointDepth > -( 4096 * 64 * 1024 ) /* new 4k-segment-based max log file size */ ); *((__int64*)pv) = max( cbCheckpointDepth, 0 ); @@ -23694,6 +24125,10 @@ JET_ERR JET_API JetTestHook( // This is only used for our resmgrenginetest.exe, and it at worst case loads 100k buffers, at // a 20 unique touches / sec rate ... this is 5 M ticks cache lifetime ... anything beyond that // is a break of our target ... + // FUTURE-2012/09/13-BrettSh - This is assuming what resmgrenginetest needs, and so may break + // some day, or need to be moved to separate variables in JET_TESTHOOKTIMEINJECTION. For now + // I am ok with this. We also may need a parameter to force if we're going to walk over 2B or + // 4B ticks and excercise wrap. g_bflruk.SetTimeBar( 90 * 60 * 1000 /* lifetime = 90 min | 5,400,000 */, pthtimeinj->tickNow + 10 * 60 * 60 * 1000 /* +10 hrs */ ); } } @@ -23735,6 +24170,8 @@ JET_ERR JET_API JetTestHook( JET_mskTestHookCorruptSpecific ) & JET_bitTestHookCorruptLeaveChecksum ) ); + // FUTURE-2013/01/16-BrettSh - This may have to be restructured and split up when we allow + // the corrupting of say log files and such. if ( pcorrupt->grbit & JET_bitTestHookCorruptDatabaseFile ) { @@ -23763,6 +24200,7 @@ JET_ERR JET_API JetTestHook( if ( pcorrupt->grbit & JET_bitTestHookCorruptPageSingleFld ) { // Help the client out ... + // FUTURE-2013/01/17-BrettSh - Probably should let client explicitly set this. (void)FNegTestSet( fCorruptingPageLogically ); // Corrupt the Page, by tweaking a byte randomly in the page. @@ -23780,6 +24218,7 @@ JET_ERR JET_API JetTestHook( if ( pcorrupt->grbit & JET_bitTestHookCorruptPageRemoveNode ) { // Help the client out ... + // FUTURE-2013/01/17-BrettSh - Probably should let client explicitly set this. (void)FNegTestSet( fCorruptingPageLogically ); AssertSz( fFalse, "NYI - caused problems if there is only 1 line on the page." ); @@ -23794,6 +24233,7 @@ JET_ERR JET_API JetTestHook( Assert( pcorrupt->CorruptDatabasePageImage.iSubTarget ); // or else this would do nothing. // Help the client out ... + // FUTURE-2013/01/17-BrettSh - Probably should let client explicitly set this. (void)FNegTestSet( fCorruptingWithLostFlush ); diff --git a/dev/ese/src/ese/jettest.cxx b/dev/ese/src/ese/jettest.cxx index 90948cfc..4868a537 100644 --- a/dev/ese/src/ese/jettest.cxx +++ b/dev/ese/src/ese/jettest.cxx @@ -5,7 +5,9 @@ #include "PageSizeClean.hxx" -#ifdef ENABLE_JET_UNIT_TEST +#ifndef ENABLE_JET_UNIT_TEST +#error "File jettest.cxx is only supposed to be referenced / compiled in the unit test .vcxproj file." +#endif #include @@ -537,5 +539,7 @@ void JetTestEnforceSEHException::Cleanup() s_pThreadExcep = NULL; } -#endif // ENABLE_JET_UNIT_TEST - +BOOL FInEmbeddedUnitTest() +{ + return fTrue; +} diff --git a/dev/ese/src/ese/sysinit.cxx b/dev/ese/src/ese/sysinit.cxx index d14c0424..94272cd9 100644 --- a/dev/ese/src/ese/sysinit.cxx +++ b/dev/ese/src/ese/sysinit.cxx @@ -11,6 +11,16 @@ BOOL g_fDBGPerfOutput = fFalse; #endif /* DEBUG || PERFDUMP */ +// This is here, because jettest.cxx is only compiled in eselibwithtest.dll + +#ifndef ENABLE_JET_UNIT_TEST + +BOOL FInEmbeddedUnitTest() +{ + return fFalse; +} + +#endif // ENABLE_JET_UNIT_TEST #ifdef DEBUG diff --git a/dev/ese/src/inc/daedef.hxx b/dev/ese/src/inc/daedef.hxx index 136250dd..06d33f97 100644 --- a/dev/ese/src/inc/daedef.hxx +++ b/dev/ese/src/inc/daedef.hxx @@ -3465,7 +3465,7 @@ INLINE ERR DBFILEHDR::DumpLite( CPRINTF* pcprintf, const char * const szNewLine, (*pcprintf)( "Revert Page Count: %u%s", (ULONG) le_ulRevertPageCount, szNewLine ); lgpos = le_lgposCommitBeforeRevert; - (*pcprintf)( "Last Commit Before Revert: (0x%X,%X,%X) ", lgpos.lGeneration, lgpos.isec, lgpos.ib ); + (*pcprintf)( "Last Commit Before Revert: (0x%X,%X,%X) %s", lgpos.lGeneration, lgpos.isec, lgpos.ib, szNewLine ); return JET_errSuccess; } @@ -5032,6 +5032,9 @@ public: BOOL m_fTermInProgress; BOOL m_fTermAbruptly; INST_STINIT m_fSTInit; + // Note: This status is not cleaned up if we fail in middle of Redo, Undo and this fact + // is used at end of JetInitEx() to log what mode we failed in. So do not reset this on + // error paths INT m_perfstatusEvent; // Redo, Undo, Runtime/Do-time, and Term. BOOL m_fBackupAllowed; @@ -5233,6 +5236,8 @@ public: CIsamSequenceDiagLog m_isdlInit; CIsamSequenceDiagLog m_isdlTerm; + volatile DWORD m_grbitHaFailureTags; + private: ERR ErrAPIAbandonEnter_( const LONG lOld ); diff --git a/dev/ese/src/inc/jettest.hxx b/dev/ese/src/inc/jettest.hxx index eebb9b53..92c623dc 100644 --- a/dev/ese/src/inc/jettest.hxx +++ b/dev/ese/src/inc/jettest.hxx @@ -356,5 +356,8 @@ void Test##component##test::Run_() #endif // !ENABLE_JET_UNIT_TEST +// defined in both ese.dll and eselibwithtest.dll, but returning different answer +BOOL FInEmbeddedUnitTest(); + #endif // JETTEST_HXX_INCLUDED diff --git a/dev/ese/src/inc/log.hxx b/dev/ese/src/inc/log.hxx index 27c685a0..8eaf7b02 100644 --- a/dev/ese/src/inc/log.hxx +++ b/dev/ese/src/inc/log.hxx @@ -1285,6 +1285,10 @@ public: BOOL FLastLRIsShutdown() const { return m_fLastLRIsShutdown; } LGPOS LgposShutDownMark() const { return m_lgposRedoShutDownMarkGlobal; } + // Note: Generally people should not be interested in lgposRedo, but failure event code has a need to + // know it directly. + LGPOS LgposDiagnosticRedoFailedAddress() const { return m_lgposRedo; } + VOID LGRRemoveFucb( FUCB * pfucb ); ERR ErrLGMostSignificantRecoveryWarning( void ); diff --git a/dev/ese/src/os/edbg.cxx b/dev/ese/src/os/edbg.cxx index cad22037..277bc378 100644 --- a/dev/ese/src/os/edbg.cxx +++ b/dev/ese/src/os/edbg.cxx @@ -665,7 +665,7 @@ const INT cfuncmap = sizeof( rgfuncmap ) / sizeof( EDBGFUNCMAP ); #define DUMPA( _struct ) { #_struct, &(CDUMPA<_struct>::instance), #_struct "
" } -#define DUMPAA( _struct, addlargs ) { #_struct, &(CDUMPA<_struct>::instance), #_struct "
" addlargs } +#define DUMPAA( _struct, addlargs ) { #_struct, &(CDUMPA<_struct>::instance), #_struct "
" addlargs } // ================================================================ @@ -691,20 +691,20 @@ LOCAL const CDUMPMAP rgcdumpmap[] = { DUMPA( LOG_STREAM ), DUMPA( LOG_WRITE_BUFFER ), DUMPA( VER ), - DUMPAA( MEMPOOL, "[|*] - =specified tag only, *=all tags" ), + DUMPAA( MEMPOOL, " [|*] - =specified tag only, *=all tags" ), DUMPA( SPLIT ), DUMPA( SPLITPATH ), DUMPA( MERGE ), DUMPA( MERGEPATH ), - DUMPA( DBFILEHDR ), + DUMPAA( DBFILEHDR, "|.|.disk" ), { "CDynamicHashTable", &(CDUMPA::instance), "CDynamicHashTable
" }, { "CApproximateIndex", &(CDUMPA::instance), "CApproximateIndex
" }, { "g_bflruk", &(CDUMPA::instance), "g_bflruk ese!g_bflruk" }, - DUMPA( COSDisk ), - DUMPA( COSFile ), + DUMPAA( COSDisk, "|.db|.edb" ), + DUMPAA( COSFile, "|.db|.edb" ), DUMPA( COSFileFind ), DUMPA( COSFileSystem ), - DUMPAA( IOREQ, "[dumpall|norunstats]" ), + DUMPAA( IOREQ, " [dumpall|norunstats]" ), { "PAGE", &(CDUMPA::instance), "PAGE [a|b|h|t|*|2|4|8|16|32] - a=alloc map, b=binary dump, h=header, t=tags, *=all, 2/4/8/16/32=pagesize" }, DUMPA( CResource ), @@ -16117,7 +16117,7 @@ DEBUG_EXT( EDBGDumpDBDiskPage ) dprintf( "Error: Could not read global FMP variables for ifmp = %d.\n", ifmp ); goto HandleError; } - else if ( pgno < 1 ) // UNDONE: don't currently support dumping page header + else if ( pgno < 1 ) { dprintf( "Error: Invalid pgno.\n" ); goto HandleError; @@ -16180,6 +16180,10 @@ DEBUG_EXT( EDBGDumpDBDiskPage ) { VirtualFree( pbPage, 0, MEM_RELEASE ); } + if ( NULL != posf ) + { + Unfetch( posf ); + } } @@ -19128,6 +19132,8 @@ VOID CDUMPA::Dump( { DBFILEHDR * pdbfilehdrDebuggee = NULL; DBFILEHDR * pdbfilehdr = NULL; + COSFile * posf = NULL; + const BOOL fReadFromDisk = ( argc >= 1 || 0 == _stricmp( argv[ 0 ], ".disk" ) ); const CHAR * const szMemDump = "mem"; @@ -19145,24 +19151,81 @@ VOID CDUMPA::Dump( return; } - if ( FFetchVariable( pdbfilehdrDebuggee, &pdbfilehdr ) ) + if ( fReadFromDisk ) { - const SIZE_T dwOffset = (BYTE *)pdbfilehdrDebuggee - (BYTE *)pdbfilehdr; + HANDLE hCurrentProcess; + ULONG64 ulCurrentProcess; + + const ULONG cbPage = Pdls()->CbPage(); - dprintf( "[DBFILEHDR] 0x%p bytes @ 0x%N\n", - QWORD( sizeof( DBFILEHDR ) ), - pdbfilehdrDebuggee ); - if ( fMemDump ) + if ( Pdls()->IfmpCurrent() == ifmpNil || Pdls()->IfmpCurrent() == 0 || + Pdls()->PfmpCache( Pdls()->IfmpCurrent() ) == NULL || + cbPage == 0 ) { - (VOID)( pdbfilehdr->Dump( CPRINTFWDBG::PcprintfInstance(), dwOffset ) ); + dprintf( "Something went wrong. To use .disk argument, must have an implicit IFMP set with !ese .db. Or we couldn't load the Pfmp cache or cbPage. (%d, 0x%p, %d)\n", + Pdls()->IfmpCurrent(), ( Pdls()->IfmpCurrent() != 0 && Pdls()->IfmpCurrent() != ifmpNil ) ? Pdls()->PfmpCache( Pdls()->IfmpCurrent() ) : NULL, cbPage ); + goto HandleError; } - else + + // UNDONE: currently assumes all databases are COSFile + // + if ( !FFetchVariable( (COSFile *)( Pdls()->PfmpCache( Pdls()->IfmpCurrent() ) )->Pfapi(), &posf ) ) { - (VOID)( pdbfilehdr->DumpLite( CPRINTFWDBG::PcprintfInstance(), "\n", dwOffset ) ); + dprintf( "Error: Could not read COSFile at 0x%N for specified FMP.\n", ( Pdls()->PfmpCache( Pdls()->IfmpCurrent() ) )->Pfapi() ); + goto HandleError; } - - Unfetch( pdbfilehdr ); + + // VirtualAlloc() the buffer to ensure alignment + // + pdbfilehdr = (DBFILEHDR *)VirtualAlloc( NULL, cbPage, MEM_COMMIT, PAGE_READWRITE ); + if ( NULL == pdbfilehdr ) + { + dprintf( "Error: Could not allocate DBFILEHDR buffer (%d bytes) via VA !\n", cbPage ); + goto HandleError; + return; + } + + HRESULT hr = g_DebugSystemObjects->GetCurrentProcessHandle( &ulCurrentProcess ); + hCurrentProcess = (HANDLE) ulCurrentProcess; + if ( FAILED( hr ) ) + { + dprintf( "Failed to fetch process handle: %#x\n", hr ); + goto HandleError; + } + + if ( !FEDBGGetDbDiskPage( hCurrentProcess, posf->Handle(), (PGNO)-1 /* 0 would be shadow header */, (BYTE*)pdbfilehdr, cbPage ) ) + { + dprintf( "Failed to read from disk handle.\n" ); + goto HandleError; + } + dprintf( "Successfully read DBFILEHDR off the disk.\n" ); + if ( pdbfilehdr->le_filetype != JET_filetypeDatabase ) + { + dprintf( "\nWARNING: The read DBFILEHDR doesn't have JET_filetypeDatabase. Corruption or maybe EBC is enabled. Dumping contents anyways.\n\n" ); + } + } + else if ( !FFetchVariable( pdbfilehdrDebuggee, &pdbfilehdr ) ) + { + dprintf( "Failed to fetch DBFILEHDR memory from debugger process.\n" ); + goto HandleError; + } + + const SIZE_T dwOffset = fReadFromDisk ? 0 : ( (BYTE *)pdbfilehdrDebuggee - (BYTE *)pdbfilehdr ); + + dprintf( "[DBFILEHDR] 0x%p bytes @ 0x%N\n", QWORD( sizeof( DBFILEHDR ) ), pdbfilehdrDebuggee ); + if ( fMemDump ) + { + (VOID)( pdbfilehdr->Dump( CPRINTFWDBG::PcprintfInstance(), dwOffset ) ); } + else + { + (VOID)( pdbfilehdr->DumpLite( CPRINTFWDBG::PcprintfInstance(), "\n", dwOffset ) ); + } + +HandleError: + + fReadFromDisk ? VirtualFree( pdbfilehdr, 0, MEM_RELEASE ) : Unfetch( pdbfilehdr ); + Unfetch( posf ); } // TrxidStack dumping From 02089753ced85b0f9fb54145b6c8532908d964ce Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Mon, 24 Oct 2022 17:41:24 +0000 Subject: [PATCH 076/102] ESE Block Cache: Async IO Worker hang (redo) Redo of !2247766 In prod, we are seeing occasional hangs in the async io worker state machine. There isn't enough state to debug exactly what happened. This change contains a rework of the state management of the cache's CRequest's IO. It improves the tracking of explicit state related to IO requests and completions. It also improves the contract with which these are used to perform IO in the multiple rounds we can do (request IO and finalization IO). This should improve debuggability and increase determinism. This also fixes a race with the completion mechanism based on CMeteredSection by replacing it with a simpler mechanism based on a count. The race could allow us to touch a released CRequest or even a released cache TLS. This change also contains a rework of the sync mechanism used to invoke async IO workers. After the previous change, I did catch a case where we clearly failed to signal the async io worker properly. This could have been the cause of the hang in prod because a missed signal could leave that async IO work stranded indefinitely. The new scheme uses an atomic update of a single word to control requests to the async io worker and when that worker needs to run. This scheme should be more reliable than the current scheme. It should also fix some performance issues that caused us to use too many worker pool threads. [Substrate:dade0d4b554f4928c889f7da2a61fe0c6e01d65f] --- dev/ese/src/ese/jetapi.cxx | 309 +------------- .../src/os/blockcache/_hashedlrukcache.hxx | 386 +++++++++++------- .../_hashedlrukcachethreadlocalstorage.hxx | 97 +++-- 3 files changed, 334 insertions(+), 458 deletions(-) diff --git a/dev/ese/src/ese/jetapi.cxx b/dev/ese/src/ese/jetapi.cxx index 7c030a68..bf47b797 100644 --- a/dev/ese/src/ese/jetapi.cxx +++ b/dev/ese/src/ese/jetapi.cxx @@ -6,9 +6,6 @@ #include "errdata.hxx" #include "_bf.hxx" // for JetTestHook -// ISSUE-2014/09/12-BrettSh - I personally view this as a layering violation of stuff that should be -// contained in the OS layer, as it is specific to the Windows OS, and as used today actually only -// affects phone. #ifdef ESENT #include "slpolicylist.h" #else @@ -59,7 +56,7 @@ Instructions for adding a JET param 2) Modify sysparam.xml to create a defintion for the JET param 3) Run gengen.bat to update the source files. -- ------ ManagedEsent Layer ------- - A) Update manually. // [2014/08/09 - UmairA]: Support for auto-generated ManagedEsent params will be added soon. + A) Update manually. // [2014/08/09 - SOMEONE]: Support for auto-generated ManagedEsent params will be added soon. */ JET_ERR ErrERRLookupErrorCategory( @@ -1361,7 +1358,6 @@ VOID INST::TraceStationId( const TraceStationIdentificationReason tsidr ) return; } - // FUTURE-2017/08/02-BrettSh - also consider adding some critical or ?ALL? params to an additional trace? ETInstStationId( tsidr, m_iInstance, (BYTE)m_perfstatusEvent, m_wszInstanceName, m_wszDisplayName ); } @@ -1397,9 +1393,6 @@ __range( 0, g_ifmpMax * ( cchPerfmonInstanceNameMax + 1 ) + 1 ) ULONG g_cchDatab BYTE* g_rgbDatabaseAggregationIDs = NULL; -// ISSUE-2009/10/21-BrettSh - This used to be static, but needed it in osu.cxx. Would -// love to get this static again, or have a cleaner init/term story for perfmon and -// these instance names. INT g_cInstances = 0; INT g_cDatabases = 0; @@ -1776,10 +1769,6 @@ void CIsamSequenceDiagLog::Trigger( _In_ const BYTE seqTrigger ) // the time adjustments - it is sort of a little ambigious, does the waits we accumulated // at m_cseqMac - 1 belong there, or at the new seqTrigger array element? So we'll assert // if jumping by more than one sequence value, that there are no time adjustments lingering. - // ISSUE-2016/12/08-BrettSh - On final review realized this is not quite complete, we may - // have skipped two or more steps ... so we should walk back checking all untriggered steps - // for this, until we hit a triggered step (which would be allowed to have callbacks, throttles - // busy waits, etc). Assert( FTriggeredStep( m_cseqMac - 1 ) || m_rgDiagInfo[ m_cseqMac - 1 ].cCallbacks == 0 ); Assert( FTriggeredStep( m_cseqMac - 1 ) || m_rgDiagInfo[ m_cseqMac - 1 ].cThrottled == 0 ); } @@ -1814,9 +1803,6 @@ void CIsamSequenceDiagLog::AddCallbackTime( const double secsCallback, const __i return; // see comments in Trigger() about this. } - // FUTURE-2016/10/30-BrettSh - Since engaging the floating point can burn alot of CPU, I really should - // have just accumulated DHRTs and then converted it to seconds at the very end for the sprintf / stats - // accumulation. Applies to AddThrottleTime() as well. m_rgDiagInfo[m_cseqMac].cCallbacks += cCallbacks; m_rgDiagInfo[m_cseqMac].secInCallback += secsCallback; } @@ -2224,11 +2210,6 @@ void CIsamSequenceDiagLog::SprintTimings( _Out_writes_bytes_(cbTimeSeq) WCHAR * dckbPagefileUsagePeak || dckbPrivateUsage ) ) { - // FUTURE-2014/12/13-BrettSh - Jeez, I wish we had the kind of engine where I wasn't measuring - // in KB! But for now, we do, and also since we don't have heap bytes (which would be more useful - // in real bytes), all of these deltas are actually multiples of KB. - // FUTURE-2016/02/06-BrettSh - We could easily add some other interesting stats like reserved - // memory, differentiating mapped binary image (b/c DLL is shared), unique reference set, etc. OSStrCbFormatW( pwszCurr, cbCurrLeft, L" +M(C:%I64dK, Fs:%d, WS:%IdK # %IdK, PF:%IdK # %IdK, P:%I64dK)", dckbCacheMem, m_rgDiagInfo[seq].memstat.cPageFaultCount - m_rgDiagInfo[seqBefore].memstat.cPageFaultCount, @@ -2535,9 +2516,6 @@ VOID INST::SaveDBMSParams( DBMS_PARAM *pdbms_param ) VOID INST::RestoreDBMSParams( DBMS_PARAM *pdbms_param ) { - // FUTURE-2010/05/20-BrettSh - So this is a bit dicey, but it is probably ok, because - // we either tear down the instance right after restore, or in the case we use it, we - // probably have to have the right params anyway. m_plog->SetCSecLGFile( pdbms_param->le_lcsecLGFile ); } @@ -2783,10 +2761,6 @@ PM_ICF_PROC LTableClassNamesICFLPwszPpb; // Max characters in any table class name's suffix that we add automatically. These // strings are defined below. #define cchTCESuffixMax (10) -// ISSUE-2006/01/21-BrettSh - I think this is a localization issue? The normal perf counters -// names as picked up from esentprf.ini are in fact localized in Vista. So does this work? -// not quite sure, should be investigated. Note simply making these Unicode doesn't work, -// someone needs to investigate how to pull the strings from a localizable context. const WCHAR * const g_wszUnknown = L"_Unknown"; const WCHAR * const g_wszCatalog = L"_Catalog"; const WCHAR * const g_wszShadowCatalog = L"_ShadowCatalog"; @@ -3048,8 +3022,6 @@ INLINE VOID RUNINSTSetModeMultiInst() // we're in, while JET_paramMaxInstances // only keeps track of the max instances in multi- // instance mode) - // FUTURE-2013/10/22-BrettSh - This is too early and far away from where we're - // allocating g_rgpinst. g_cpinstMax = (ULONG)UlParam( JET_paramMaxInstances ); g_ifmpMax = g_cpinstMax * dbidMax + cfmpReserved; } @@ -3076,16 +3048,6 @@ LOCAL ERR ErrRUNINSTCheckAndSetOneInstMode() if ( RUNINSTGetMode() == runInstModeNoSet ) { Assert( g_cpinstInit == 0 ); - // FUTURE-2013/10/17-BrettSh - This is delicate code ... you see restore will - // allocate the instance (for single inst mode), BUT it's dangerous to leave - // ourselves in this state ... b/c if we fail out anywhere between here and - // restore getting into the beginning of ErrNewInst(), we leave ESE in this - // stuck state, where it thinks it is in one-inst mode, but there is no actual - // instance initialized. You can't JetTerm() the non-existing implicit instance - // to restore yourself to the no-mode state. It might be better to actually - // move all this code into ErrINSTSystemInit() but JetEnableMultiInstance() - // actually utilizes this separated state to force us to one way go into multi- - // inst mode. RUNINSTSetModeOneInst(); } else if ( RUNINSTGetMode() == runInstModeMultiInst ) @@ -3107,7 +3069,7 @@ LOCAL ERR ErrRUNINSTCheckOneInstMode() if ( RUNINSTGetMode() == runInstModeNoSet ) { Assert( g_cpinstInit == 0 ); - // BrettSh here: I checked and I could only find 4 instances (all variants of + // SOMEONE here: I checked and I could only find 4 instances (all variants of // JetRestore) of this called where it would expect / be OK with no mode being // set. So the callers expect no mode to be a failure (which they will fail // with once they call ErrFindPinst()). @@ -3207,23 +3169,12 @@ class CInstanceFileSystemConfiguration : public CDefaultFileSystemConfiguration DWORD cioT = 0; switch( rand() % 5 ) { - // FUTURE-2013/08/06-BrettSh - Actually we should try 1 and 2 and 3 and other low numbers - // just to search for bugs ... - //case 0: cioT = 1; break; - //case 0: cioT = 2; break; - //case 0: cioT = 3; break; - //case 0: cioT = 12; break; - //case 1: cioT = 36; break; - //case 2: cioT = 108; break; case 0: cioT = 324; break; case 1: cioT = 1024; break; case 2: cioT = 3072; break; case 3: cioT = 10000; break; case 4: cioT = 32764; break; } - // FUTURE-2013/08/26-BrettSh - Really should have an OSTrace() to indicate all interesting - // settings overrides. - //wprintf( L"\t\tDefaulted JET_paramOutstandingIOMax = %d\n", cioT ); m_cioOutstandingMax = min( (ULONG)UlParam( m_pinst, JET_paramOutstandingIOMax ), cioT ); } #endif // DEBUG @@ -3277,7 +3228,7 @@ class CInstanceFileSystemConfiguration : public CDefaultFileSystemConfiguration // initialize this setting if ( m_permillageSmoothIo == dwMax ) { - // Exs: 999‰ = 99.9% Smooth, 990‰ = 99.0% Smooth, 900‰ = 90.0% Smooth. Debug default = 0.2% + // Exs: 999� = 99.9% Smooth, 990� = 99.0% Smooth, 900� = 90.0% Smooth. Debug default = 0.2% ULONG permillageSmoothIo = OnDebugOrRetail( 2, CDefaultFileSystemConfiguration::PermillageSmoothIo() ); if ( m_pinst ) @@ -3639,10 +3590,6 @@ ERR ErrNewInst( // initialize the system if we are creating the first instance // - // FUTURE-2013/10/21-BrettSh - I would like to see this get moved out of ErrNewInst() at - // some point as some of the global state setting is done with the runInstModeMultiInst, - // runInstModeNoSet, runInstOneInst outside / before this, and some is done in here where - // we seem to be predominantly caring about specific inst stuff. if ( 0 == g_cpinstInit ) { // OSUInit's done my misc APIs may not have been done with the correct global params, @@ -3661,11 +3608,6 @@ ERR ErrNewInst( // See if g_rgpinst still have space to hold the pinst. // - // FUTURE-2013/10/21-BrettSh - I would like to see this get moved out of ErrNewInst() at - // some point as this seems to be g_rgpinst list management being done here, where as much - // what else is done in here seems to be predominantly caring about specific inst stuff. - // BTW, we should move this bounds check, and the ipinst loop check for a conflicting name, - // and the loop trying to find an empty slot. if ( g_cpinstInit >= g_cpinstMax ) { Error( ErrERRCheck( JET_errTooManyInstances ) ); @@ -3783,7 +3725,6 @@ ERR ErrNewInst( // but if we're creating a _new_ DB, and we happen to be in the downgrade window the new DB would use engine // default (i.e. upgradeed) and then if downgrade happens, we couldn't attach it, so we actually sort of need // to set this back to an old / safe version. - // See also FUTURE-2018/07/25-BrettSh comment. pinst->m_rgparam[ JET_paramEngineFormatVersion ].Reset( pinst, JET_efvExchange2016Cu1Rtm | JET_efvAllowHigherPersistedFormat ); } } @@ -3973,14 +3914,10 @@ LOCAL ERR ErrFindPinst( JET_INSTANCE jinst, INST **ppinst, INT *pipinst = NULL ) case runInstModeOneInst: // find the only one instance, ignore the given instance // since the given one may be bogus - // FUTURE-2013/10/16-BrettSh - Why would it not always be - // slot [0]? This applies to other APIs as well, such as - // JetGet|SetSystemParameter(). for ( ipinst = 0; ipinst < g_cpinstMax; ipinst++ ) { if ( pinstNil != g_rgpinst[ ipinst ] ) { - // testing the FUTURE-2013/10/16-BrettSh comment. Assert( ipinst == 0 ); *ppinst = g_rgpinst[ ipinst ]; if ( pipinst ) @@ -4094,7 +4031,7 @@ class APICALL { protected: ERR m_err; - INT m_op; // 2014/11/03-UmairA - To make the change easier, we cache the op so that we don't have to modify every call site. + INT m_op; // 2014/11/03-SOMEONE - To make the change easier, we cache the op so that we don't have to modify every call site. INT m_opOuter; // This is the saved op of the "outer" JET API when we come into another JET API from a JET callback. IOREASONTERTIARY m_iortOuter; @@ -4643,13 +4580,6 @@ ERR INST::ErrAPIEnterForInit() ERR INST::ErrAPIEnterWithoutInit( const BOOL fAllowInitInProgress ) { ERR err; - // NTRAID#ESE-115-2013/06/11-BrettSh - This has a fundamental timing hole ... while - // it will work while the INST is in the _MIDDLE_ of init(w/ err) or term, near - // the end it is functionally broken b/c we're using a member variable of the - // INST * that is to be deallocated on Term or on a failure during Init. We can - // probably solve this by moving this m_cSessionInJetAPI into the g_rgpinst next - // to the INST *, and then give out the index to that array slot instead of the - // INST * we do today. LONG lOld = AtomicExchangeAdd( &m_cSessionInJetAPI, 1 ); if ( ( lOld & maskAPILocked ) && // API can't be locked, unless ... @@ -4770,9 +4700,6 @@ VOID INST::EnterCritInst() { g_critInst.Enter(); } VOID INST::LeaveCritInst() { // We should have consistent g_runInstMode and g_cpinstInit parameters at this point - // FUTURE-2007/11/05-BrettSh - I am dubious that it is a good idea to allow - // us to be in no mode and one mode w/ g_cpinstInit 0 or 1. Ideally, we'd make - // the state transition to onemode at the same time as allocating an inst ... Assert( ( runInstModeNoSet == g_runInstMode && 2 > g_cpinstInit ) || ( runInstModeOneInst == g_runInstMode && 2 > g_cpinstInit ) || ( runInstModeMultiInst == g_runInstMode ) ); @@ -4823,24 +4750,6 @@ ERR INST::ErrINSTSystemInit() Assert( 0 == g_cpinstInit ); Assert( g_rgpinst == NULL ); // or we'll be leaking memory. - // FUTURE-2013/10/22-BrettSh - I've flown too high, and got burned by trying - // to move this code from the RUNINSTSetModeOneInst()/RUNINSTSetModeMultiInst() - // area ... this runs afoul of some perfmon re-allocation protection in - // ErrOSUInit(). What should happen? See RUNINSTSetModeMultiInst(). - //switch( RUNINSTGetMode() ) - // { - //case runInstModeOneInst: - // g_cpinstMax = 1; - // g_ifmpMax = g_cpinstMax * dbidMax + cfmpReserved; - // break; - //case runInstModeMultiInst: - // g_cpinstMax = (ULONG)UlParam( JET_paramMaxInstances ); - // g_ifmpMax = g_cpinstMax * dbidMax + cfmpReserved; - // break; - //default: - // // No appropriate mode set before ErrIsamSystemInit()/ErrINSTSystemInit()! - // EnforceSz( fFalse, "InvalidInstMode" ); - // } Alloc( g_rgpinst = new INST*[g_cpinstMax] ); memset( g_rgpinst, 0, sizeof(INST*) * g_cpinstMax ); @@ -5769,7 +5678,7 @@ ERR CJetParam::GetString( // UNICODE_COMPATIBILITY: // This is tricky, b/c JET used to just truncate the value, not return error if not enough buffer. - // However, I (Brett) think that we should change the contract, because anyone who is + // However, I (SOMEONE) think that we should change the contract, because anyone who is // getting a truncated string, is proably unknowingly failing in some logical way. // err = ErrOSStrCbCopyW( wszParam, cbParamMax, (WCHAR*)pjetparam->m_valueCurrent ); @@ -6161,7 +6070,7 @@ ERR CJetParam::IllegalClone( CJetParam* const pjetparamSrc, // 32kb pages: 8,150 // 16kb pages: 4,050 // -// 2017/11/15-UmairA - With prefix-compression on LID64, current chunk size still allows us to store 4 chunks + LVROOT on 1 page. +// 2017/11/15-SOMEONE - With prefix-compression on LID64, current chunk size still allows us to store 4 chunks + LVROOT on 1 page. // We can avoid changing the chunk size and still get optimal storage characteristics. // // (The chunk size on 8kb and 4kb pages stays the same to preserve @@ -6270,10 +6179,6 @@ GetCommitDefault( const CJetParam* const pjetparam, if ( pinst != pinstNil ) { - // FUTURE-2011/08/15-BrettSh - It is interesting and probably a poor choice that - // you have to specify both a pinst and a ppib because a ppib necessarily implies - // a specific pinst. Consider moving this if ( ppib ) out of the if ( pinst ) - // clause above. if ( ppib != ppibNil ) { ULONG ulActual; @@ -6287,8 +6192,6 @@ GetCommitDefault( const CJetParam* const pjetparam, } else { - // FUTURE-2011/08/31-BrettSh - Consider removing this and making it set the grbit - // in the actual INST's param table instead. *((JET_GRBIT*)pulParam) = pinst->m_grbitCommitDefault; } } @@ -6441,8 +6344,6 @@ SetCheckpointDepthMax( CJetParam* const pjetparam, ERR err = JET_errSuccess; Assert( pjetparam->m_paramid == JET_paramCheckpointDepthMax ); - // ISSUE-2012/04/11-BrettSh - Stupidly we can't set this b/c interop just passes an int arg to both params! - //Expected( wszParam == NULL ); Expected( ppib == ppibNil ); Call( CJetParam::SetInteger( pjetparam, pinst, ppib, ulParam, wszParam ) ); @@ -6912,7 +6813,7 @@ struct ConfigSetOverrideValue #define CO( paramid, value, flags ) { value, paramid, flags } -// It is important (cough, Anil, cough, Jonathan, cough Ian) that we do not give into our desire to +// It is important (cough, SOMEONE, cough, SOMEONE, cough SOMEONE) that we do not give into our desire to // easily control our clients params with our binary and define only configuration sets that can be // abstracted to a logically sensible configuration for our engine that is not a layering violation. @@ -6928,8 +6829,6 @@ const ConfigSetOverrideValue g_rgJetConfigRemoveQuotas[] = { // parameters. Ignoring the fact that many of these arguments did FORMERLY increase memory usage. }; -// FUTURE-2012/03/13-BrettSh - Some others that we might want to consider tuning ... esp. for the -// fact that ManagedStore runs with B Tree Defrag and DbScan ... though maybe they'll turn these off. /* NORMAL_PARAM( JET_paramDefragmentSequentialBTrees, CJetParam::typeBoolean, 0, 0, 0, 0, 0, -1, 1 ), @@ -6960,9 +6859,6 @@ const ConfigSetOverrideValue g_rgJetConfigRemoveQuotas[] = { const ConfigSetOverrideValue g_rgJetConfigLowMemory[] = { //CO( JET_paramEnableAdvanced, fFalse, 0x0 ), // pre-win8 we used to disable this on smallConfig, but it turned out to be a terrible idea - // FUTURE-2012/03/12-BrettSh - future site for breaking out the small config settings into their - // own table. These variables should be checked for consistency with those a 2nd time when that - // is done. // Global Component Control // @@ -7071,9 +6967,6 @@ const ConfigSetOverrideValue g_rgJetConfigSSDProfileIO[] = { //CO( JET_paramMaxCoalesceReadGapSize, 256*1024, 0x0 ), // we actually want read gapping (according to NT perf team) ... though in theory we should NOT need it with SSDs. Leaving it at default 256 KB, so we don't get carried away. Have not verified ourselves. CO( JET_paramMaxCoalesceWriteGapSize, 0, 0x0 ), // avoid overwriting pieces of the disk that do not need to be updated CO( JET_paramOutstandingIOMax, 16, 0x0 ), // do not need to be as aggressive for read IO max - // FUTURE-2012/03/13-BrettSh - Consider bringing this parameter back to hobble the write IO aggressiveness asymetrically - // compared to the read IO aggressiveness / JET_paramOutstandingIOMax. - //CO( JET_paramCheckpointIOMax, 1, 0x0 ), // not used right now CO( JET_paramPrereadIOMax, 5, 0x0 ), // should not need as much prereading for SSDs }; @@ -7086,15 +6979,6 @@ const ConfigSetOverrideValue g_rgJetConfigRunSilent[] = { // disables ETW tracing - which looks especially expensive now. }; -// FUTURE-2012/03/15-BrettSh - List of ideas for other configurations ... -// - Like LowMemory and MediumMemory, there is probably room for RunSilent differentiation above, -// like perhaps a RunQuiet, and RunVerbose or RunChatty or RunNoisy. Martin thinks RunStealth, -// RunSneak, and RunCloaked are good, and I'll add to that RunNinja is a good one! Honestly, -// it's like little boys are writing this code! -// - Another one might be like JET_configBenchmark or JET_configTopPerf or something that takes -// all the stops out and runs us in the fastest / heaviest weight mode possible. -// - Any others? -// #undef CO @@ -7184,9 +7068,6 @@ VOID SetJetConfigSet( INST * const pinst, _In_reads_(cConfigOverrides) const Con ulpFinal *= 2; } - // ISSUE-2012/03/17-BrettSh - We actually know the difference between when a client has - // set a param or not ... we could make the config sets skip parameters that the client has - // already set before setting the configuration parameter. // if pinst == NULL, then we're resetting parameters globally (not on a per-instance // basis), so we can reset this parameter now. @@ -7339,10 +7220,6 @@ SetConfiguration( CJetParam* const pjetparam, // if ( !( pinst != pinstNil && pjetparamT->FGlobal() ) ) { - // ISSUE-2012/03/12-BrettSh - Here (and the other 2 calls to Reset()) all - // can fail due to calling this at the wrong time, so the API has kind of - // a sucky contract in that some settings just may not be overriden with - // no feedback to the consumer. (void)pjetparamT->Reset( pinst, pjetparamT->m_valueDefault[ configLegacy & JET_configDefault ] ); } @@ -7395,10 +7272,6 @@ SetConfiguration( CJetParam* const pjetparam, if ( pinst == pinstNil || !pjetparamT->FGlobal() ) { - // FUTURE-2012/03/13-BrettSh - As soon as I can write a test to validate that I won't regress this, we'll - // take the riskier approach to move to the g_rgJetConfigLowMemory, g_rgJetConfigRemoveQuotas[] entries - // for configLegacySmall and remove the checks for configLegacy != configLegacySmall above and the extra - // array slot in the sys param definition. if ( pjetparamT->m_valueDefault[ configLegacySmall ] != pjetparamT->m_valueDefault[ configLegacyLegacy ] ) { (void)pjetparamT->Reset( pinst, pjetparamT->m_valueDefault[configLegacySmall] ); @@ -7415,7 +7288,6 @@ SetConfiguration( CJetParam* const pjetparam, } if ( configSet & JET_configUnthrottledMemory ) { - // FUTURE-2013/05/01-BrettSh - Fix JET_configDynamicHighMemory to do something. configSet &= ~JET_configUnthrottledMemory; } if ( configSet & JET_configSSDProfileIO ) @@ -7426,7 +7298,6 @@ SetConfiguration( CJetParam* const pjetparam, // after memory configs b/c relies on paramCacheSizeMax and most likely battery is the top-most concern if ( configSet & JET_configLowPower ) { - // FUTURE-2012/03/12-BrettSh - There are lots of things to consider, log file size, log buffers, etc. configSet &= ~JET_configLowPower; } // since these are specifically pools that don't affect memory size, it's fine to have near end @@ -7443,7 +7314,6 @@ SetConfiguration( CJetParam* const pjetparam, } if ( configSet & JET_configHighConcurrencyScaling ) { - // FUTURE-2013/05/01-BrettSh - Fix JET_configHighConcurrencyScaling to do something. configSet &= ~JET_configHighConcurrencyScaling; } @@ -7454,9 +7324,6 @@ SetConfiguration( CJetParam* const pjetparam, return ErrERRCheck( JET_errInvalidParameter ); } - // FUTURE-2012/03/13-BrettSh - It is worth noting that if we ever set any paths that get recalculated - // in here via FixDefaultSystemParameters(), we'll need to add another call to FixDefaultSystemParameters - // here. return err; } @@ -7685,28 +7552,7 @@ ErrSetCacheClosedTables( CJetParam* const pjetparam, value2 ) \ NORMAL_PARAMEX( #paramid, paramid, type, fAdvanced, fGlobal, fMayNotWriteAfterGlobalInit, fMayNotWriteAfterInstanceInit, rangeLow, rangeHigh, value, value2 ) -// ISSUE-2012/02/02-martinc: g_rgparamRaw *should* be on a read-only data page. At the -// time of this writing, it is for x86, but not amd64 or ARM. The latter platforms contain -// a dynamic initializer. Because we need to pay the COW penalty for most of our platforms -// regardless of whether it's 'const' or not, it is simpler to pay the COW penalty on -// all platforms for simpler code. -// -// If the g_rgparamRaw array can be made 'const' and eliminate the dynamic initializer, then -// we will want to keep it on a readonly page and copy it to some heap memory to avoid the -// COW operations. -// -// The following are known factors causing a dynamic initializer: -// -Presence of a destructor (~JetParam). -// -Initialization involving { a ? b : c }. (e.g. PFNGET_OF_TYPE). The compiler does not -// evaluate these ternary expressions at compile time. -// -// Even after eliminating these two factors, the dynamic initializer was still instantiated. -// ISSUE-2012/02/13-martinc. There is a bug in the Windows 8 OACR that causes the cl.exe wrapper -// to run away consuming gigabytes of memory, and thus causing a DoS on the build machine. -// The bug has been fixed in another branch, but may take a while to make it to our branch. -// So this #ifdef _PREFAST_ workaround does not have to be in here for long. -// Windows 8 Bugs:694176. // UNICODE_UNDONE_DEFERRED: Technically don't need to do this, but I noticed that if you set a default string value to ASCII string, there is no compile error, that means it is ripe for a bug. @@ -7788,12 +7634,8 @@ const size_t g_cparam = _countof( g_rgparamRaw ); C_ASSERT( sizeof( JetParam ) == sizeof( CJetParam ) ); // g_grparamRaw is an array of JetParam, and g_rgparam is an array of the child class CJetParam. -// FUTURE-2012/02/06-martinc. g_gparam may one day be dynamically allocated -// and copied from g_rgparamRaw. CJetParam* const g_rgparam = (CJetParam*) &g_rgparamRaw[ 0 ]; -// FUTURE-2015/03/04-AndyGo - Only remaining reliance is in ErrITSetConstants calling FixDefaultSystemParameters. -// Otherwise, FixDefaultSystemParameters is protected by g_critInst LOCAL CCriticalSection g_critSysParamFixup( CLockBasicInfo( CSyncBasicInfo( "g_critSysParamFixup" ), rankSysParamFixup, 0 ) ); // Some System Parameters need a dynamic default that can't be established at compile time. @@ -7843,10 +7685,6 @@ VOID FixDefaultSystemParameters() // Configure Default Path Variables // - // ISSUE-2013/11/06-BrettSh - Didn't think you could create a pfsapi before ErrOSUInit() ... someone - // should investigate that. If not valid, fix. IF valid, then perhaps change all the other OSU - // users (JetGetDatabaseFileInfo, JetRemoveLogFile, etc) off the OSU so we don't have that perfmon - // allocation pre-init problems. Call( ErrOSFSCreate( g_pfsconfigGlobal, &pfsapi ) ); Call( pfsapi->ErrPathFolderDefault( rgwchDefaultPath, _countof( rgwchDefaultPath ), &fIsDefaultDirectory ) ); @@ -8098,14 +7936,6 @@ ERR ErrSysParamLoadDefaults( const BOOL fHasCritInst, INST * pinst, CConfigStore Assert( !fHasCritInst || INST::FOwnerCritInst() ); - // FUTURE-2013/11/02-BrettSh - It occurred to me a bit late, that it is possibly more - // efficient to use some form of registry enumeration API ... rather than test get from - // registry almost 200 values. Of course then you'd have to search through all the - // strings for the param table looking for a match! :P Blech. - // ISSUE-2013/11/06-BrettSh - Got a build error by declaring cparam as "const cparam" in - // the arg list above (which I didn't even know you could do, must be assuming int), and - // then not having a cast to (size_t) here ... BUT only failed build in focus, NOT on - // local box. Debug that, and fix local build to match focus build. Seriously what the? for ( size_t iparamid = 0; iparamid < (size_t)cparam; iparamid++ ) { Assert( prgparam[iparamid].m_paramid == iparamid ); // sanity check @@ -8314,9 +8144,9 @@ class InitCallbackWrapper }; // -// In what can only be described as a new-wave fusion of Romanian Andrei-escu-esc CAuto -// class model sprinkled with JLiem-like char/wchar agnostic _T-esk templating gloss, to -// create a not quite break the debugger Goodsellian templated auto class for converting +// In what can only be described as a new-wave fusion of Romanian SOMEONE-escu-esc CAuto +// class model sprinkled with SOMEONE-like char/wchar agnostic _T-esk templating gloss, to +// create a not quite break the debugger SOMEONEian templated auto class for converting // V1 index create structures to type V2 index create structures. // template< class JET_INDEXCREATE_T, class JET_INDEXCREATE2_T > @@ -8418,9 +8248,9 @@ CAutoINDEXCREATE1To2_T< JET_INDEXCREATE_T, JET_INDEXCREATE2_T >::~CAutoINDEXCREA //===================================================== // -// In what can only be described as a new-wave fusion of Romanian Andrei-escu-esc CAuto -// class model sprinkled with JLiem-like char/wchar agnostic _T-esk templating gloss, to -// create a not quite break the debugger Goodsellian templated auto class for converting +// In what can only be described as a new-wave fusion of Romanian SOMEONE-escu-esc CAuto +// class model sprinkled with SOMEONE-like char/wchar agnostic _T-esk templating gloss, to +// create a not quite break the debugger SOMEONEian templated auto class for converting // V2 index create structures to type V3 index create structures. // template< class JET_INDEXCREATE2_T, class JET_INDEXCREATE3_T > @@ -12472,9 +12302,6 @@ LOCAL JET_ERR JetGetVersionEx( _In_ JET_SESID sesid, _Out_ ULONG *pVersion ) // 27-31 Image Major // // - // ISSUE-2009/08/03-KetanD - Need to revise this before we get to Windows 16. - // ISSUE-2009/08/03-KetanD - On Windows, we are relying on the build number not resetting for - // service packs. If that changes, our version ULONG moves backwards. #ifdef ESENT // assert no aliasing (i.e. overlap) of version information @@ -12488,9 +12315,6 @@ LOCAL JET_ERR JetGetVersionEx( _In_ JET_SESID sesid, _Out_ ULONG *pVersion ) Assert( DwUtilSystemServicePackNumber() < 1 << 8 ); - // ISSUE-2009/08/03-KetanD - Note that we had not been using BuildNumberMinor or ImageVersionMinor for Windows since 9/2004. - // When we revise this API, we can try having a build number again. The lack of ImageVersionMinor - // was unintentional. Using service packs for low bits is questionable. const ULONG ulVersion = ( ( DwUtilImageVersionMajor() & 0xF ) << 28 ) + ( ( DwUtilImageVersionMinor() & 0xF ) << 24 ) + @@ -12557,11 +12381,6 @@ LOCAL JET_ERR JetGetSystemParameterEx( _In_ JET_INSTANCE instance, _In_ JET_SESID sesid, _In_ ULONG paramid, - // FUTURE-2005/06/09-BrettSh - This changing of SAL is for a SINGLE JET param that I know - // of, JET_paramErrorToString, where it uses an OUT param as basically an IN param!! Guh. - // We'd be best to put this back, and make the clients use some other function for getting - // errors. - // __out_bcount_opt(cbMax) JET_API_PTR *plParam, __out_opt JET_API_PTR * plParam, _Out_opt_z_bytecap_( cbMax ) JET_PWSTR wszParam, _In_ ULONG cbMax ) @@ -12653,10 +12472,6 @@ LOCAL JET_ERR JetGetSystemParameterExA( _In_ JET_INSTANCE instance, _In_ JET_SESID sesid, _In_ ULONG paramid, - // FUTURE-2005/06/09-BrettSh - This changing of SAL is for a SINGLE JET param that I know - // of, JET_paramErrorToString, where it uses an OUT param as basically an IN param!! - // We'd be best to put this back, and make the clients use some other function for getting - // errors. __out_opt JET_API_PTR * plParam, _Out_opt_z_bytecap_( cbMax ) JET_PSTR szParam, _In_ ULONG cbMax ) @@ -14144,13 +13959,6 @@ LOCAL JET_ERR JetGetPageInfoEx( || pgnoNull == pgno || pgnoMax == pgno ) { - // FUTURE-2006/08/23-JLiem - AndyGo suggested - // setting a flag in JET_PAGEINFO to indicate - // that this page is being interpreted as a - // trailer page (so the caller can verify - // that this is in fact the last physical - // page in the database) - // pagetype = databaseHeader; pgno = pgnoNull; } @@ -15297,9 +15105,6 @@ C_ASSERT( sizeof(JET_TABLECREATE2_W) != sizeof(JET_TABLECREATE3_A) ); LOCAL JET_ERR JetCreateTableColumnIndexEx( _In_ JET_SESID sesid, _In_ JET_DBID dbid, - // NTRAID#ESE-125-2014/06/18-BrettSh - Are you implementing JET_TABLECREATE6_A !?!?! Then please please - // please please split out the in and out args per the ESE bug cited. - // ISSUE ISSUE / FUTURE FUTURE - NOTICE ME. __inout JET_TABLECREATE5_A * ptablecreate ) { JET_ERR err; @@ -15332,13 +15137,6 @@ LOCAL JET_ERR JetCreateTableColumnIndexEx( } else { - // FUTURE-2014/07/09-martinc;BrettSh - This is a particularly inefficient method of creating - // a table because of the forced deep copy. The problems preventing passing the original - // structure with JET_bitTableCreateImmutableStructure are: - // - The list of columnids are updated in rgcolumns, that are consumed later in ErrFILEICreateIndexes (columnidT = pcolcreate->columnid;). - // - (Solvable) Closing the exclusively-opened tableid. - // - (Solvable) Not writing the index id's. - // - Unknown further issues beyond ... ;-) CAutoTABLECREATE5To5_T< JET_TABLECREATE5_A, JET_TABLECREATE5_A, CAutoINDEXCREATE3To3_T< JET_INDEXCREATE3_A, JET_INDEXCREATE3_A > > tablecreate; Call( tablecreate.ErrSet( ptablecreate ) ); @@ -17316,9 +17114,6 @@ LOCAL JET_ERR JetBackupInstanceEx( if ( apicall.FEnter( instance ) ) { - // ISSUE-2009/12/05-BrettSh - I don't really like this, because the backup callback - // doesn't have to match the Init callback. The backup even has a JET_SESID that we - // could pass to the backup callback. InitCallbackWrapper initCallbackWrapper(pfnStatus); apicall.LeaveAfterCall( apicall.Pinst()->m_fBackupAllowed ? ErrIsamBackup( (JET_INSTANCE)apicall.Pinst(), wszBackupPath, grbit, InitCallbackWrapper::PfnWrapper, &initCallbackWrapper ) : @@ -17396,9 +17191,6 @@ JET_ERR JET_API JetRestoreA( _In_ JET_PCSTR szSource, __in_opt JET_PFNSTATUS Assert( fInitd == ( g_rgpinst != NULL ) ); - // ISSUE-2013/10/15-BrettSh - I'm not convinced this is safe, we definitely could have someone - // term at the same time, and a bad race and AV here. I don't think we should spin too many - // cycles on a concurrent race condition, that could cause other AVs as well. err = JetRestoreInstanceA( g_rgpinst ? (JET_INSTANCE)g_rgpinst[0] : NULL, szSource, NULL, pfn ); // I am not sure this holds ... @@ -20159,7 +19951,7 @@ LOCAL JET_ERR JetDBUtilitiesEx( JET_DBUTIL_W *pdbutilW ) // With the introduction of restartable seeds, some long-held assumptions and checks about the backup set and its // required range may not hold anymore. Therefore, this JET_bitDBUtilOptionSkipMinLogChecksUpdateHeader option was // created to handle that case. - // I (ADaCosta) think we shouldn't even have the bit and just always pass fTrue to the function below to signal + // I (SOMEONE) think we shouldn't even have the bit and just always pass fTrue to the function below to signal // that the backup set is being handled externally, regardless of whether or not it was restarted. Perhaps we // should do that in the future and deprecate exposing the bit in the first place. // @@ -21282,19 +21074,10 @@ ERR ErrTermComplete( JET_INSTANCE instance, JET_GRBIT grbit ) { pinst->m_pbackup->BKLockBackup(); - // FUTURE-2006/03/11-BrettSh - may be concurrency hole, b/c logutil.cxx doesn't check - // m_fBackupAllowed inside this crit section? Check thoroughly ... pinst->m_pbackup->BKUnlockBackup(); // Lazy way to wait until the snapshot terminates / aborts and comes back to us, letting // the term thread go on normally. - // FUTURE-2006/06/10-BrettSh - We should signal the freeze-thaw thread that we can - // abort and cleanup the snapshow backup immediately. For some reaon though this - // all finishes fairly quickly so it seems not needed? Not sure I understand why. - // In theory it shouldn't be that hard to just call snapshot abort if this is taking - // too long for anyone? - // - // Exchange12 138101: Yield to VSS writer thread to avoid deadlock while( pinst->m_pOSSnapshotSession != NULL ) { CESESnapshotSession::SnapshotCritLeave(); @@ -21529,9 +21312,6 @@ LOCAL JET_ERR JetEnableMultiInstanceEx( WCHAR wszParamName[100]; OSStrCbFormatW( wszParamName, sizeof(wszParamName), L"%hs", g_rgparam[iparamid].m_szParamName ); const WCHAR * rgwszT[] = { wszParamName }; - // FUTURE-2013/11/20-BrettSh - It would be cool if we could generate a string for the identity - // of the _other_ instance that is _most likely_ currently initialized so that we could identify - // the conflicting service. UtilReportEvent( eventWarning, GENERAL_CATEGORY, GLOBAL_SYSTEM_PARAMETER_NOT_SET_PREVIOUSLY_MISMATCH_ID, @@ -21544,13 +21324,6 @@ LOCAL JET_ERR JetEnableMultiInstanceEx( IBitmapAPI::ERR errBitmap = fbm.ErrSet( iparamid, fTrue ); Assert( errBitmap == IBitmapAPI::ERR::errSuccess ); - // ISSUE-2013/11/15-BrettSh comment we just have to load reg defaults - // here but in a special way that says we're only interested in the set property ... and to - // check the equality below ... I think. Maybe we can just load this specific param and see if - // the value matches? Think about how this integrates w/ people setting paramConfigStoreSpec - // itself! Ugh ... that seems like it would break, but I think it works? Find the test. OHHH, - // I see we won't set it the 2nd time, so if the reg values have changed, we'll ignore it. Log - // an event or fail out or both if they don't match? switch( g_rgparam[iparamid].Type_() ) { @@ -21587,9 +21360,6 @@ LOCAL JET_ERR JetEnableMultiInstanceEx( WCHAR wszParamName[100]; OSStrCbFormatW( wszParamName, sizeof(wszParamName), L"%hs", g_rgparam[iparamid].m_szParamName ); const WCHAR * rgwszT[] = { wszParamName }; - // FUTURE-2013/11/20-BrettSh - It would be cool if we could generate a string for the identity - // of the _other_ instance that is _most likely_ currently initialized so that we could identify - // the conflicting service. UtilReportEvent( eventWarning, GENERAL_CATEGORY, GLOBAL_SYSTEM_PARAMETER_MISMATCH_ID, @@ -21619,9 +21389,6 @@ LOCAL JET_ERR JetEnableMultiInstanceEx( // Since the registry defaults _appear written_, it can seem like the // two sets disagree. We will assume that if they set the same value // that they got the same set of params. This is not the safest bet. - // FUTURE-2013/11/15-BrettSh - Make this fail depending upon if the - // actual registry parameters values have changed. A sort of deep - // read of the fact. !g_rgparam[iparamid].m_fRegDefault ) { Assert( fOriginallySet ); @@ -21629,9 +21396,6 @@ LOCAL JET_ERR JetEnableMultiInstanceEx( WCHAR wszParamName[100]; OSStrCbFormatW( wszParamName, sizeof(wszParamName), L"%hs", g_rgparam[iparamid].m_szParamName ); const WCHAR * rgwszT[] = { wszParamName }; - // FUTURE-2013/11/20-BrettSh - It would be cool if we could generate a string for the identity - // of the _other_ instance that is _most likely_ currently initialized so that we could identify - // the conflicting service. UtilReportEvent( eventWarning, GENERAL_CATEGORY, GLOBAL_SYSTEM_PARAMETER_SET_PREVIOUSLY_MISMATCH_ID, @@ -22703,9 +22467,6 @@ JET_ERR JET_API JetTerm2( _In_ JET_INSTANCE instance, _In_ JET_GRBIT grbit ) JET_TRY( opTerm, JetTermEx( instance, grbit ) ); } -// FUTURE-2012/04/10-BrettSh - The new version came in too late to risk the existing API (says -// Alex) so we'll make the v1 JET API call this and v2 will call the new JetStopServiceInstanceEx() -// API. In Win9 we can move the v1 to the v2 API with the JET_bitStopServiceAll. JET_ERR JET_API JetStopServiceInstanceExOld( _In_ JET_INSTANCE instance ) { ERR err; @@ -22713,8 +22474,6 @@ JET_ERR JET_API JetStopServiceInstanceExOld( _In_ JET_INSTANCE instance ) OSTrace( JET_tracetagAPI, OSFormat( "Start %s(0x%Ix)", __FUNCTION__, instance ) ); - // ISSUE-2013/10/16-BrettSh - We didn't do APICALL_INST::FEnter(), nor are we in - // INST::FOwnerCritInst() ... so I don't think this is actually safe. CallR( ErrFindPinst( instance, &pinst ) ); // Halt OLD for this instance @@ -22755,8 +22514,6 @@ JET_ERR JET_API JetStopServiceInstanceEx( _In_ JET_INSTANCE instance, _In_ JET_G // Validate and retrieve args // - // ISSUE-2013/10/16-BrettSh - We didn't do APICALL_INST::FEnter(), nor are we in - // INST::FOwnerCritInst() ... so I don't think this is actually safe. CallR( ErrFindPinst( instance, &pinst ) ); const JET_GRBIT bitStopServiceAllInternal = 0x1; @@ -22822,8 +22579,6 @@ JET_ERR JET_API JetStopServiceInstanceEx( _In_ JET_INSTANCE instance, _In_ JET_G pinst->m_fCheckpointQuiesce = fFalse; - // FUTURE-2012/04/21-BrettSh - Should we reject this if ( 0 == ( pinst->m_grbitStopped & JET_bitStopServiceQuiesceCaches ) ) ... - // it means they asked for a service to be resumed that wasn't stopped in the first place? Well the contract is fulfilled. // We don't need to call ErrIOUpdateCheckpoints() like we do for quiesce, because it // wouldn't do anything ... so we'll let the user drive the checkpoint back up from @@ -22912,8 +22667,6 @@ JET_ERR JET_API JetStopServiceInstanceEx( _In_ JET_INSTANCE instance, _In_ JET_G { // Halt OLD for this instance - // FUTURE-2012/04/10-BrettSh - These should all in the fullness of time move under - // the JET_bitStopServiceBackgroundUserTasks bit, once they are all restartable. DBMScanStopAllScansForInst( pinst ); OLDTermInst( pinst ); OLD2TermInst( pinst ); @@ -22924,18 +22677,10 @@ JET_ERR JET_API JetStopServiceInstanceEx( _In_ JET_INSTANCE instance, _In_ JET_G //OnDebug( grbitCheck &= ~bitStopServiceAllInternal ); } - // FUTURE-2012/04/10-BrettSh - This method is essentially Jetterm broken up piece-meal, - // and I think I would like to see JetTerm() implemented as each piece of these done - // independently. - // FUTURE-2012/04/10-BrettSh - It would be good to implement JetStopBackupInstanceEx as - // another grbit here. if ( grbit & JET_bitStopServiceBackgroundUserTasks ) { // Halt OLDv2/B+ Tree defrag for this instance - // ISSUE-2012/04/18-BrettSh - It would be better to suspend in-progress B+ tree defrags - // as well, BUT the way Exchange will use this (as it will be suspended _most_ of the - // time until the maintenance window), then we're unlikely to need this. FMP::EnterFMPPoolAsWriter(); FMP * pfmpCurr = NULL; if ( pinst && pinst->m_fJetInitialized ) @@ -23069,8 +22814,6 @@ LOCAL JET_ERR JetStopBackupInstanceEx( _In_ JET_INSTANCE instance ) OSTrace( JET_tracetagAPI, OSFormat( "Start %s(0x%Ix)", __FUNCTION__, instance ) ); - // ISSUE-2013/10/16-BrettSh - We didn't do APICALL_INST::FEnter(), nor are we in - // INST::FOwnerCritInst() ... so I don't think this is actually safe. CallR( ErrFindPinst( instance, &pinst ) ); if ( pinst->m_plog ) @@ -23805,9 +23548,6 @@ JET_ERR ErrTESTHOOKAlterDatabaseFileHeader( const JET_TESTHOOKALTERDBFILEHDR * c Call( ErrUtilReadShadowedHeader( pinstNil, pfsapi, pfapiDatabase, JET_filetypeDatabase, (BYTE*)pdbfilehdr, (DWORD)g_cbPageMax, (LONG)OffsetOf( DBFILEHDR_FIX, le_cbPageSize ), urhfReadOnly|urhfNoFailOnPageMismatch, &cbPageSize, &shs ) ); Call( CFlushMapForUnattachedDb::ErrGetPersistedFlushMapOrNullObjectIfRuntime( palterdbfilehdr->szDatabase, pdbfilehdr, pinstNil, &pfm ) ); - // FUTURE-2016/03/16-BrettSh - Be better if we kept the flush map up to date, but not critical to this - // feature, so dropping. - //Alloc( pfm ); // Must be after ErrUtilReadShadowedHeader() so we have page size. if ( ( palterdbfilehdr->ibField + palterdbfilehdr->cbField ) > cbPageSize ) @@ -23891,16 +23631,10 @@ JET_ERR JET_API JetTestHook( } break; - // FUTURE-2010/01/02-BrettSh - Try to integrate this testing method to the other and - // bring convergence to how we trigger unit tests. Probably need to move ErrOSUInit() - // above to make that happen. case opTestHookUnitTests2: { const JET_TESTHOOKUNITTEST2* const pParams = reinterpret_cast( pv ); - // ISSUE-2010/01/25-BrettSh - In one of fugliest API decisions ever, the - // JET_dbidNil (JET_DBIDs which get casted to IFMPs internally) is different - // from ifmpNil. const INT failures = JetUnitTest::RunTests( pParams->szTestName, pParams->dbidTestOn == JET_dbidNil ? ifmpNil : (IFMP)pParams->dbidTestOn ); if( failures > 0 ) @@ -23931,7 +23665,7 @@ JET_ERR JET_API JetTestHook( { if ( pParams->type != JET_TestInjectFault || pParams->grbit != JET_bitInjectionProbabilityPct || - pParams->ulProbability != 5 /* b/c that's what g_bflruk is using, move along Alex */ ) + pParams->ulProbability != 5 /* b/c that's what g_bflruk is using, move along SOMEONE */ ) { // Call( ErrERRCheck( JET_errInvalidParameter ) ); @@ -24079,10 +23813,6 @@ JET_ERR JET_API JetTestHook( const LGPOS lgposNewest = pinst->m_plog->LgposLGLogTipNoLock(); const __int64 cbCheckpointDepth = (__int64)pinst->m_plog->CbLGOffsetLgposForOB0( lgposNewest, lgposCheckpoint ); - // ISSUE-2012/04/18-BrettSh - I've gotten -8 out of this calculation! This is because the - // lgposTip is at beginning of the last LR pushed into the log buffer, not after it. So - // this should just be calculated as zero. We should fix this in log if possible, this is - // just silly. Assert( cbCheckpointDepth > -( 4096 * 64 * 1024 ) /* new 4k-segment-based max log file size */ ); *((__int64*)pv) = max( cbCheckpointDepth, 0 ); @@ -24125,10 +23855,6 @@ JET_ERR JET_API JetTestHook( // This is only used for our resmgrenginetest.exe, and it at worst case loads 100k buffers, at // a 20 unique touches / sec rate ... this is 5 M ticks cache lifetime ... anything beyond that // is a break of our target ... - // FUTURE-2012/09/13-BrettSh - This is assuming what resmgrenginetest needs, and so may break - // some day, or need to be moved to separate variables in JET_TESTHOOKTIMEINJECTION. For now - // I am ok with this. We also may need a parameter to force if we're going to walk over 2B or - // 4B ticks and excercise wrap. g_bflruk.SetTimeBar( 90 * 60 * 1000 /* lifetime = 90 min | 5,400,000 */, pthtimeinj->tickNow + 10 * 60 * 60 * 1000 /* +10 hrs */ ); } } @@ -24170,8 +23896,6 @@ JET_ERR JET_API JetTestHook( JET_mskTestHookCorruptSpecific ) & JET_bitTestHookCorruptLeaveChecksum ) ); - // FUTURE-2013/01/16-BrettSh - This may have to be restructured and split up when we allow - // the corrupting of say log files and such. if ( pcorrupt->grbit & JET_bitTestHookCorruptDatabaseFile ) { @@ -24200,7 +23924,6 @@ JET_ERR JET_API JetTestHook( if ( pcorrupt->grbit & JET_bitTestHookCorruptPageSingleFld ) { // Help the client out ... - // FUTURE-2013/01/17-BrettSh - Probably should let client explicitly set this. (void)FNegTestSet( fCorruptingPageLogically ); // Corrupt the Page, by tweaking a byte randomly in the page. @@ -24218,7 +23941,6 @@ JET_ERR JET_API JetTestHook( if ( pcorrupt->grbit & JET_bitTestHookCorruptPageRemoveNode ) { // Help the client out ... - // FUTURE-2013/01/17-BrettSh - Probably should let client explicitly set this. (void)FNegTestSet( fCorruptingPageLogically ); AssertSz( fFalse, "NYI - caused problems if there is only 1 line on the page." ); @@ -24233,7 +23955,6 @@ JET_ERR JET_API JetTestHook( Assert( pcorrupt->CorruptDatabasePageImage.iSubTarget ); // or else this would do nothing. // Help the client out ... - // FUTURE-2013/01/17-BrettSh - Probably should let client explicitly set this. (void)FNegTestSet( fCorruptingWithLostFlush ); diff --git a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx index 06ed89b1..58f74186 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx @@ -138,7 +138,13 @@ class THashedLRUKCache m_cCachingFileIO( 0 ), m_iorl( this ), m_pfnIORangeLockAcquired( NULL ), - m_keyIORangeLockAcquired( NULL ) + m_keyIORangeLockAcquired( NULL ), + m_fIORangeLockAcquired( fFalse ), + m_fIORequested( fFalse ), + m_cIOPending( 0 ), + m_pctls( NULL ), + m_pmsig( NULL ), + m_fWaitForIOCompleted( fFalse ) { m_ilRequestsByIO.InsertAsPrevMost( this ); } @@ -154,7 +160,7 @@ class THashedLRUKCache ERR ErrStatus() const { return THashedLRUKCacheBase::CRequest::ErrStatus(); } typename CHashedLRUKCachedFileTableEntry::CIORangeLockBase* Piorl() { return &m_iorl; } - BOOL FIOCompleted() const { return m_msIO.FEmpty(); } + BOOL FWaitForIOCompleted() const { return !m_fIORequested || m_fWaitForIOCompleted; } BOOL FCacheMiss() const { return m_fCacheMiss; } BOOL FCacheHit() const { return m_fCacheHit; } @@ -193,14 +199,42 @@ class THashedLRUKCache } } - typedef CMeteredSection::PFNPARTITIONCOMPLETE PfnIOComplete; - - void WaitForIO( _In_opt_ CRequest::PfnIOComplete pfnIOComplete = NULL, - _In_opt_ const DWORD_PTR keyIOComplete = NULL ) + void WaitForIO( _In_ CHashedLRUKCacheThreadLocalStorage* const pctls = NULL ) { + CManualResetSignal msig( CSyncBasicInfo( "THashedLRUKCache::CRequest::WaitForIO::msig" ) ); + + Assert( !m_fWaitForIOCompleted ); + IssueIO(); - m_msIO.Partition( pfnIOComplete, keyIOComplete ); + // setup to wait for the IO to complete either sync or async + + m_pctls = pctls; + m_pmsig = pctls ? NULL : &msig; + + if ( pctls ) + { + msig.Set(); + } + + // release an IO ref count. whoever drops this count to -1 causes the completion + + ReleaseRefIO(); + + // if this is a sync completion then wait + + msig.Wait(); + } + + void ResetWaitForIO() + { + Assert( FWaitForIOCompleted() ); + + m_fIORequested = fFalse; + AtomicExchange( (LONG*)&m_cIOPending, 0 ); + m_fWaitForIOCompleted = fFalse; + m_pctls = NULL; + m_pmsig = NULL; } ERR ErrWriteCluster( _In_ ICachedBlockSlab* const pcbs, @@ -211,6 +245,8 @@ class THashedLRUKCache ERR err = JET_errSuccess; const CClusterWriteCompletionContext* pcwcc = PcwccGetClusterCompletionContext(); + Assert( !m_fWaitForIOCompleted ); + OSTrace( JET_tracetagBlockCacheOperations, OSFormat( "C=%s R=0x%016I64x F=%s Write Cluster %s", OSFormatFileId( Pc() ), @@ -244,6 +280,8 @@ class THashedLRUKCache { ERR err = JET_errSuccess; + Assert( !m_fWaitForIOCompleted ); + OSTrace( JET_tracetagBlockCacheOperations, OSFormat( "C=%s R=0x%016I64x F=%s Read Cluster %s", OSFormatFileId( Pc() ), @@ -270,6 +308,8 @@ class THashedLRUKCache ERR err = JET_errSuccess; BYTE* const pbData = (BYTE*)PbData() + ibOffset - Offsets().IbStart(); + Assert( !m_fWaitForIOCompleted ); + OSTrace( JET_tracetagBlockCacheOperations, OSFormat( "C=%s R=0x%016I64x F=%s Read Block ib=%llu cb=%u", OSFormatFileId( Pc() ), @@ -292,6 +332,8 @@ class THashedLRUKCache ERR err = JET_errSuccess; const BYTE* const pbData = PbData() + ibOffset - Offsets().IbStart(); + Assert( !m_fWaitForIOCompleted ); + OSTrace( JET_tracetagBlockCacheOperations, OSFormat( "C=%s R=0x%016I64x F=%s Write Block ib=%llu cb=%u", OSFormatFileId( Pc() ), @@ -363,27 +405,72 @@ class THashedLRUKCache ~CRequest() { - Assert( m_msIO.FEmpty() ); + Assert( FWaitForIOCompleted() ); } void Start() override { THashedLRUKCacheBase::CRequest::Start(); - if ( m_msIO.GroupActive() != 0 ) + AddRefIO(); + } + + void Finish( _In_ const ERR err ) override + { + THashedLRUKCacheBase::CRequest::Finish( err ); + + ReleaseRefIO(); + } + + void AddRefIO() + { + m_fIORequested = fTrue; + AtomicIncrement( (LONG*)&m_cIOPending ); + } + + void ReleaseRefIO() + { + if ( AtomicDecrement( (LONG*)&m_cIOPending ) == -1 ) { - m_msIO.Partition(); + WaitForIOComplete(); } - - const CMeteredSection::Group group = m_msIO.Enter(); - Assert( group == 0 ); } - void Finish( _In_ const ERR err ) override + void WaitForIOComplete() { - m_msIO.Leave( 0 ); + // cache the completion context + // + // NOTE: this is stable while m_cIOPending != -1 - THashedLRUKCacheBase::CRequest::Finish( err ); + CHashedLRUKCacheThreadLocalStorage* pctls = m_pctls; + CManualResetSignal* const pmsig = m_pmsig; + + // if this is an async completion then we must reference the TLS so it isn't released + + if ( pctls ) + { + pctls->AddRef(); + } + + // indicate that the IO is complete + // + // NOTE: we cannot touch the object after this point because it can be released by the + // Async IO Worker immediately + + m_fWaitForIOCompleted = fTrue; + + // do the completion notification + + if ( pctls ) + { + pctls->CueAsyncIOWorker(); + CHashedLRUKCacheThreadLocalStorage::Release( &pctls ); + } + + if ( pmsig ) + { + pmsig->Set(); + } } private: @@ -473,6 +560,8 @@ class THashedLRUKCache { m_prequest->m_pfnIORangeLockAcquired( m_prequest->m_keyIORangeLockAcquired ); } + + m_prequest->m_fIORangeLockAcquired = fTrue; } private: @@ -559,7 +648,6 @@ class THashedLRUKCache private: const CClusterWriteCompletionContext m_rgcwcc[ 2 ]; - CMeteredSection m_msIO; BOOL m_fCacheMiss; int m_cCachedFileIO; BOOL m_fCacheHit; @@ -571,6 +659,12 @@ class THashedLRUKCache CIORangeLock m_iorl; CRequest::PfnIORangeLockAcquired m_pfnIORangeLockAcquired; DWORD_PTR m_keyIORangeLockAcquired; + BOOL m_fIORangeLockAcquired; + BOOL m_fIORequested; + volatile LONG m_cIOPending; + BOOL m_fWaitForIOCompleted; + CHashedLRUKCacheThreadLocalStorage* m_pctls; + CManualResetSignal* m_pmsig; }; // Update Slab Visitor @@ -4817,6 +4911,7 @@ class THashedLRUKCache _In_ CRequest* const prequestIO ); void WaitForPendingIO( _In_ CRequest* const prequestIO ); BOOL FCompletedIO( _In_ CRequest* const prequestIO ); + void ClearIOCompletion( _In_ CRequest* const prequestIO ); void RequestFinalizeIO( _In_ CRequest* const prequestIO ); void RequestIO( _In_ CRequest* const prequestIO, @@ -8280,168 +8375,171 @@ void THashedLRUKCache::Issue() template void THashedLRUKCache::AsyncIOWorker( _In_ CHashedLRUKCacheThreadLocalStorage* const pctls ) { - BOOL fIORangeLockFailure = fFalse; - CRequest* prequestIONext = NULL; - pctls->BeginAsyncIOWorker(); - // for each issued IO, request an IO range lock in terms of the cached file. these IO range locks not only - // protect against chaotic concurrent IO to overlapping offsets but they also serialize all activity for that - // offset range including things like write back or moving cached blocks in the caching file - - pctls->CritAsyncIOWorkerState().Enter(); - prequestIONext = NULL; - for ( CRequest* prequestIO = pctls->IlIOIssued().PrevMost(); - prequestIO; - prequestIO = prequestIONext ) + while ( !pctls->FTryEndAsyncIOWorker() ) { - prequestIONext = pctls->IlIOIssued().Next( prequestIO ); + BOOL fIORangeLockFailure = fFalse; + CRequest* prequestIONext = NULL; - if ( FWaitForIORangeLock( prequestIO, pctls ) ) - { - pctls->IlIOIssued().Remove( prequestIO ); - pctls->IlIORangeLockPending().InsertAsNextMost( prequestIO ); - } - else - { - fIORangeLockFailure = fTrue; - } - } - pctls->CritAsyncIOWorkerState().Leave(); - - // determine which requested IO range locks have been acquired - - prequestIONext = NULL; - for ( CRequest* prequestIO = pctls->IlIORangeLockPending().PrevMost(); - prequestIO; - prequestIO = prequestIONext ) - { - prequestIONext = pctls->IlIORangeLockPending().Next( prequestIO ); + // for each issued IO, request an IO range lock in terms of the cached file. these IO range locks not only + // protect against chaotic concurrent IO to overlapping offsets but they also serialize all activity for that + // offset range including things like write back or moving cached blocks in the caching file - if ( prequestIO->Piorl()->FLocked() ) + pctls->CritAsyncIOWorkerState().Enter(); + prequestIONext = NULL; + for ( CRequest* prequestIO = pctls->IlIOIssued().PrevMost(); + prequestIO; + prequestIO = prequestIONext ) { - pctls->IlIORangeLockPending().Remove( prequestIO ); - pctls->IlIORangeLocked().InsertAsNextMost( prequestIO ); - } - } + prequestIONext = pctls->IlIOIssued().Next( prequestIO ); - // for each locked IO, request IO against the cached file and then the caching file. we do this to maximize - // our chances of IO optimization by the underlying file system implementation - // - // NOTE: RequestCachedFileIO / RequestCachingFileIO is touching slabs twice + if ( FWaitForIORangeLock( prequestIO, pctls ) ) + { + pctls->IlIOIssued().Remove( prequestIO ); + pctls->IlIORangeLockPending().InsertAsNextMost( prequestIO ); + } + else + { + fIORangeLockFailure = fTrue; + } + } + pctls->CritAsyncIOWorkerState().Leave(); - while ( CRequest* prequestIO = pctls->IlIORangeLocked().PrevMost() ) - { - RequestCachedFileIO( prequestIO ); + // determine which requested IO range locks have been acquired - pctls->IlIORangeLocked().Remove( prequestIO ); - pctls->IlCachedFileIORequested().InsertAsNextMost( prequestIO ); - } + prequestIONext = NULL; + for ( CRequest* prequestIO = pctls->IlIORangeLockPending().PrevMost(); + prequestIO; + prequestIO = prequestIONext ) + { + prequestIONext = pctls->IlIORangeLockPending().Next( prequestIO ); - while ( CRequest* prequestIO = pctls->IlCachedFileIORequested().PrevMost() ) - { - RequestCachingFileIO( prequestIO ); + if ( prequestIO->Piorl()->FLocked() ) + { + pctls->IlIORangeLockPending().Remove( prequestIO ); + pctls->IlIORangeLocked().InsertAsNextMost( prequestIO ); + } + } - pctls->IlCachedFileIORequested().Remove( prequestIO ); - pctls->IlCachingFileIORequested().InsertAsNextMost( prequestIO ); - } + // for each locked IO, request IO against the cached file and then the caching file. we do this to maximize + // our chances of IO optimization by the underlying file system implementation + // + // NOTE: RequestCachedFileIO / RequestCachingFileIO is touching slabs twice - // wait for all IO issued so far but asynchronously + while ( CRequest* prequestIO = pctls->IlIORangeLocked().PrevMost() ) + { + RequestCachedFileIO( prequestIO ); - while ( CRequest* prequestIO = pctls->IlCachingFileIORequested().PrevMost() ) - { - WaitForPendingIOAsync( pctls, prequestIO ); + pctls->IlIORangeLocked().Remove( prequestIO ); + pctls->IlCachedFileIORequested().InsertAsNextMost( prequestIO ); + } - pctls->IlCachingFileIORequested().Remove( prequestIO ); - pctls->IlIOPending().InsertAsNextMost( prequestIO ); - } + while ( CRequest* prequestIO = pctls->IlCachedFileIORequested().PrevMost() ) + { + RequestCachingFileIO( prequestIO ); - // for each pending IO, check for any IOs that are complete + pctls->IlCachedFileIORequested().Remove( prequestIO ); + pctls->IlCachingFileIORequested().InsertAsNextMost( prequestIO ); + } - prequestIONext = NULL; - for ( CRequest* prequestIO = pctls->IlIOPending().PrevMost(); - prequestIO; - prequestIO = prequestIONext ) - { - prequestIONext = pctls->IlIOPending().Next( prequestIO ); + // wait for all IO issued so far but asynchronously - if ( FCompletedIO( prequestIO ) ) + while ( CRequest* prequestIO = pctls->IlCachingFileIORequested().PrevMost() ) { - pctls->IlIOPending().Remove( prequestIO ); - pctls->IlIOCompleted().InsertAsNextMost( prequestIO ); + WaitForPendingIOAsync( pctls, prequestIO ); + + pctls->IlCachingFileIORequested().Remove( prequestIO ); + pctls->IlIOPending().InsertAsNextMost( prequestIO ); } - } - // for each completed IO, finalize the IO in the cache + // for each pending IO, check for any IOs that are complete - while ( CRequest* prequestIO = pctls->IlIOCompleted().PrevMost() ) - { - RequestFinalizeIO( prequestIO ); + prequestIONext = NULL; + for ( CRequest* prequestIO = pctls->IlIOPending().PrevMost(); + prequestIO; + prequestIO = prequestIONext ) + { + prequestIONext = pctls->IlIOPending().Next( prequestIO ); - pctls->IlIOCompleted().Remove( prequestIO ); - pctls->IlFinalizeIORequested().InsertAsNextMost( prequestIO ); - } + if ( FCompletedIO( prequestIO ) ) + { + pctls->IlIOPending().Remove( prequestIO ); + pctls->IlIOCompleted().InsertAsNextMost( prequestIO ); - // wait for all IO issued so far but asynchronously + ClearIOCompletion( prequestIO ); + } + } - while ( CRequest* prequestIO = pctls->IlFinalizeIORequested().PrevMost() ) - { - WaitForPendingIOAsync( pctls, prequestIO ); + // for each completed IO, finalize the IO in the cache - pctls->IlFinalizeIORequested().Remove( prequestIO ); - pctls->IlFinalizeIOPending().InsertAsNextMost( prequestIO ); - } + while ( CRequest* prequestIO = pctls->IlIOCompleted().PrevMost() ) + { + RequestFinalizeIO( prequestIO ); - // for each pending finalize IO, check for any IOs that are complete + pctls->IlIOCompleted().Remove( prequestIO ); + pctls->IlFinalizeIORequested().InsertAsNextMost( prequestIO ); + } - prequestIONext = NULL; - for ( CRequest* prequestIO = pctls->IlFinalizeIOPending().PrevMost(); - prequestIO; - prequestIO = prequestIONext ) - { - prequestIONext = pctls->IlFinalizeIOPending().Next( prequestIO ); + // wait for all IO issued so far but asynchronously - if ( FCompletedIO( prequestIO ) ) + while ( CRequest* prequestIO = pctls->IlFinalizeIORequested().PrevMost() ) { - pctls->IlFinalizeIOPending().Remove( prequestIO ); - pctls->IlFinalizeIOCompleted().InsertAsNextMost( prequestIO ); + WaitForPendingIOAsync( pctls, prequestIO ); + + pctls->IlFinalizeIORequested().Remove( prequestIO ); + pctls->IlFinalizeIOPending().InsertAsNextMost( prequestIO ); } - } - // for each finalized IO, release the IO range locks and trigger the IO completion + // for each pending finalize IO, check for any IOs that are complete - while ( CRequest* prequestIO = pctls->IlFinalizeIOCompleted().PrevMost() ) - { - pctls->IlFinalizeIOCompleted().Remove( prequestIO ); + prequestIONext = NULL; + for ( CRequest* prequestIO = pctls->IlFinalizeIOPending().PrevMost(); + prequestIO; + prequestIO = prequestIONext ) + { + prequestIONext = pctls->IlFinalizeIOPending().Next( prequestIO ); - ReleaseIORangeLock( prequestIO, pctls ); + if ( FCompletedIO( prequestIO ) ) + { + pctls->IlFinalizeIOPending().Remove( prequestIO ); + pctls->IlFinalizeIOCompleted().InsertAsNextMost( prequestIO ); + } + } - CRequest* prequestNext = NULL; - for ( CRequest* prequest = prequestIO->IlRequestsByIO().PrevMost(); - prequest; - prequest = prequestNext ) + // for each finalized IO, release the IO range locks and trigger the IO completion + + while ( CRequest* prequestIO = pctls->IlFinalizeIOCompleted().PrevMost() ) { - prequestNext = prequestIO->IlRequestsByIO().Next( prequest ); + pctls->IlFinalizeIOCompleted().Remove( prequestIO ); - prequestIO->IlRequestsByIO().Remove( prequest ); + ReleaseIORangeLock( prequestIO, pctls ); - if ( prequest != prequestIO ) + CRequest* prequestNext = NULL; + for ( CRequest* prequest = prequestIO->IlRequestsByIO().PrevMost(); + prequest; + prequest = prequestNext ) { - pctls->RemoveRequest( prequest ); + prequestNext = prequestIO->IlRequestsByIO().Next( prequest ); + + prequestIO->IlRequestsByIO().Remove( prequest ); + + if ( prequest != prequestIO ) + { + pctls->RemoveRequest( prequest ); + } } - } - pctls->RemoveRequest( prequestIO ); - } + pctls->RemoveRequest( prequestIO ); + } - // if we failed to get an IO Range Lock and we currently have no IO Range Locks then we should try to issue again + // if we failed to get an IO Range Lock and we currently have no IO Range Locks then we should try to issue again - if ( fIORangeLockFailure && pctls->CIORangeLocked() == 0 ) - { - pctls->CueAsyncIOWorker(); + if ( fIORangeLockFailure && pctls->CIORangeLocked() == 0 ) + { + pctls->CueAsyncIOWorker(); + } } - - pctls->EndAsyncIOWorker(); } template @@ -8463,6 +8561,7 @@ ERR THashedLRUKCache::ErrSynchronousIO( _In_ CRequest* const prequest ) // wait for all the IO to complete even if it is not needed for finalization WaitForPendingIO( prequest ); + ClearIOCompletion( prequest ); // finalize the IO in the cache @@ -8673,7 +8772,7 @@ void THashedLRUKCache::WaitForPendingIOAsync( _In_ CHashedLRUKCacheThreadL prequest; prequest = prequestIO->IlRequestsByIO().Next( prequest ) ) { - prequest->WaitForIO( CHashedLRUKCacheThreadLocalStorage::CueAsyncIOWorker_, (DWORD_PTR)pctls ); + prequest->WaitForIO( pctls ); } } @@ -8695,7 +8794,7 @@ BOOL THashedLRUKCache::FCompletedIO( _In_ CRequest* const prequestIO ) prequest; prequest = prequestIO->IlRequestsByIO().Next( prequest ) ) { - if ( !prequest->FIOCompleted() ) + if ( !prequest->FWaitForIOCompleted() ) { return fFalse; } @@ -8704,6 +8803,17 @@ BOOL THashedLRUKCache::FCompletedIO( _In_ CRequest* const prequestIO ) return fTrue; } +template +void THashedLRUKCache::ClearIOCompletion( _In_ CRequest* const prequestIO ) +{ + for ( CRequest* prequest = prequestIO->IlRequestsByIO().PrevMost(); + prequest; + prequest = prequestIO->IlRequestsByIO().Next( prequest ) ) + { + prequest->ResetWaitForIO(); + } +} + template void THashedLRUKCache::RequestFinalizeIO( _In_ CRequest* const prequestIO ) { diff --git a/dev/ese/src/os/blockcache/_hashedlrukcachethreadlocalstorage.hxx b/dev/ese/src/os/blockcache/_hashedlrukcachethreadlocalstorage.hxx index 4a2a1b14..ff400c32 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcachethreadlocalstorage.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcachethreadlocalstorage.hxx @@ -17,8 +17,7 @@ class CHashedLRUKCacheThreadLocalStorage // ctls : CCacheThreadLocalStorageBase( ctid ), m_pc( NULL ), m_ptpwIssue( NULL ), - m_semAsyncIOWorkerRequest( CSyncBasicInfo( "CHashedLRUKCacheThreadLocalStorage::m_semAsyncIOWorkerRequest" ) ), - m_semAsyncIOWorkerExecute( CSyncBasicInfo( "CHashedLRUKCacheThreadLocalStorage::m_semAsyncIOWorkerExecute" ) ), + m_cwAsyncIOWorkerState( ControlWord::cwNone ), m_ctidAsyncIOWorker( ctidInvalid ), m_critAsyncIOWorkerState( CLockBasicInfo( CSyncBasicInfo( "CHashedLRUKCacheThreadLocalStorage::m_critAsyncIOWorkerState" ), rankIssued, 0 ) ), m_rgibSlab { 0 }, @@ -27,9 +26,6 @@ class CHashedLRUKCacheThreadLocalStorage // ctls m_cIORangeLocked( 0 ), m_cbIORangeLocked( 0 ) { - m_semAsyncIOWorkerRequest.Release(); - m_semAsyncIOWorkerRequest.Release(); - m_semAsyncIOWorkerExecute.Release(); } void Initialize( _In_ THashedLRUKCache* const pc, _Inout_ TP_WORK** const pptpwIssue ) @@ -186,36 +182,73 @@ class CHashedLRUKCacheThreadLocalStorage // ctls void BeginAsyncIOWorker() { - // serialize execution of the async IO worker because more than one can be requested and executing concurrently - - m_semAsyncIOWorkerExecute.Acquire(); m_ctidAsyncIOWorker = CtidCurrentThread(); + + OSTrace( JET_tracetagBlockCacheOperations, + OSFormat( "C=%s BeginAsyncIOWorker .%x", + OSFormatFileId( Pc() ), + Ctid() ) ); } - void EndAsyncIOWorker() + BOOL FTryEndAsyncIOWorker() { - // enable another async IO worker task to execute + BOOL fNewRequest = fFalse; - m_ctidAsyncIOWorker = ctidInvalid; - m_semAsyncIOWorkerExecute.Release(); + // if the worker is requested and running then clear the requested state + // if the worker is not requested and running then clear the running state - // allow another async IO worker request to be made + OSSYNC_FOREVER + { + const ControlWord cwAsyncIOWorkerStateBIExpected = (ControlWord)AtomicRead( (LONG*)&m_cwAsyncIOWorkerState ); + const ControlWord cwAsyncIOWorkerStateAI = cwAsyncIOWorkerStateBIExpected == ControlWord::cwRunning ? + ControlWord::cwNone : + ControlWord::cwRunning; + const ControlWord cwAsyncIOWorkerStateBI = (ControlWord)AtomicCompareExchange( (LONG*)&m_cwAsyncIOWorkerState, + (LONG)cwAsyncIOWorkerStateBIExpected, + (LONG)cwAsyncIOWorkerStateAI ); + + if ( cwAsyncIOWorkerStateBI == cwAsyncIOWorkerStateBIExpected ) + { + fNewRequest = cwAsyncIOWorkerStateBI == ControlWord::cwRequestedAndRunning; + break; + } + } - m_semAsyncIOWorkerRequest.Release(); + OSTrace( JET_tracetagBlockCacheOperations, + OSFormat( "C=%s FTryEndAsyncIOWorker .%x = %s", + OSFormatFileId( Pc() ), + Ctid(), + OSFormatBoolean( !fNewRequest ) ) ); - // release the ref count for this request + // if we are no longer running then release the ref count for the async IO worker - CHashedLRUKCacheThreadLocalStorage* pctlsT = this; - Release( &pctlsT ); + if ( !fNewRequest ) + { + CHashedLRUKCacheThreadLocalStorage* pctlsT = this; + Release( &pctlsT ); + } + + // we have succeeded in ending if there is no new request + + return !fNewRequest; } void CueAsyncIOWorker() { - // try to get a token to request the async IO worker + // request the async IO worker + + const ControlWord cwAsyncIOWorkerStateBI = (ControlWord)AtomicExchange( (LONG*)&m_cwAsyncIOWorkerState, + (LONG)ControlWord::cwRequestedAndRunning ); + + const BOOL fNewRequest = ( cwAsyncIOWorkerStateBI == ControlWord::cwNone || + cwAsyncIOWorkerStateBI == ControlWord::cwRunning ); + const BOOL fSignalNeeded = cwAsyncIOWorkerStateBI == ControlWord::cwNone; - if ( m_semAsyncIOWorkerRequest.FTryAcquire() ) + // signal the async IO worker if necessary + + if ( fSignalNeeded ) { - // add a ref count for this request + // add a ref count for the async IO worker AddRef(); @@ -223,6 +256,13 @@ class CHashedLRUKCacheThreadLocalStorage // ctls SubmitThreadpoolWork( PtpwIssue() ); } + + OSTrace( JET_tracetagBlockCacheOperations, + OSFormat( "C=%s CueAsyncIOWorker .%x fNewRequest = %s fSignalNeeded = %s", + OSFormatFileId( Pc() ), + Ctid(), + OSFormatBoolean( fNewRequest ), + OSFormatBoolean( fSignalNeeded ) ) ); } static void CueAsyncIOWorker_( _In_ const DWORD_PTR keyIOComplete ) @@ -241,21 +281,26 @@ class CHashedLRUKCacheThreadLocalStorage // ctls ~CHashedLRUKCacheThreadLocalStorage() { - m_semAsyncIOWorkerRequest.Acquire(); - m_semAsyncIOWorkerRequest.Acquire(); - m_semAsyncIOWorkerExecute.Acquire(); - m_pc->ReleaseThreadpoolState( &m_ptpwIssue ); } + private: + + enum class ControlWord : LONG + { + cwNone = 0, + cwRequested = 1, + cwRunning = 2, + cwRequestedAndRunning = 3, + }; + private: THashedLRUKCache* m_pc; TP_WORK* m_ptpwIssue; CCountedInvasiveList m_ilIORequested; - CSemaphore m_semAsyncIOWorkerRequest; - CSemaphore m_semAsyncIOWorkerExecute; + volatile ControlWord m_cwAsyncIOWorkerState; CacheThreadId m_ctidAsyncIOWorker; CCountedInvasiveList m_ilIORangeLockPending; CCountedInvasiveList m_ilIORangeLocked; From 3279bf7cae627dcdd5d07dc0d59308cc3db481c7 Mon Sep 17 00:00:00 2001 From: Brett Shirley Date: Wed, 26 Oct 2022 09:33:05 +0000 Subject: [PATCH 077/102] Restructuring missed protecting the UlParam call against NULL pinst for instance specific parameter. [Substrate:3ffa3f3be28f0ae99c2833871044da83b20162c1] --- dev/ese/src/ese/_osu/hapublishu.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/ese/src/ese/_osu/hapublishu.cxx b/dev/ese/src/ese/_osu/hapublishu.cxx index 4a564121..807cae40 100644 --- a/dev/ese/src/ese/_osu/hapublishu.cxx +++ b/dev/ese/src/ese/_osu/hapublishu.cxx @@ -34,7 +34,7 @@ void OSUHAPublishEvent_( fEmit = fFalse; } - if ( !UlParam( pinst, JET_paramEnableHaPublish ) ) + if ( pinstNil != pinst && !UlParam( pinst, JET_paramEnableHaPublish ) ) { // might be nice to Assert/FireWall not O365 Datacenter / Store.worker, but a bit of a layer violation fEmit = fFalse; From ea819cfe39df14cadd662d529de01c64f8987c5a Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Thu, 27 Oct 2022 15:30:11 +0000 Subject: [PATCH 078/102] ESE Block Cache: Perf: enable LZ4 compression for Journal Entries [Substrate:624764a0a842e37cc9bd245e362f9f72b6316b7f] --- dev/ese/src/os/blockcache/_journalentry.hxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/ese/src/os/blockcache/_journalentry.hxx b/dev/ese/src/os/blockcache/_journalentry.hxx index b95e1570..3b6d7b39 100644 --- a/dev/ese/src/os/blockcache/_journalentry.hxx +++ b/dev/ese/src/os/blockcache/_journalentry.hxx @@ -232,7 +232,7 @@ INLINE ERR TCompressedJournalEntry::ErrCreate( _In_ cons // determine our compression algorithm - ca = caLegacyXpressHuffman; + ca = caLz4; // compress the data From 17f3ad3f51e871145993975b6ea6ae85b145de2f Mon Sep 17 00:00:00 2001 From: Anil Ruia Date: Fri, 28 Oct 2022 14:42:35 +0000 Subject: [PATCH 079/102] Delete RCE in logical rather than allocation order On passives, RCEs can be allocated out of order because of deferred RCEs which are only allocated later if needed. Cleaning them up in that allocation order can result in RCE chains during the cleanup which are illegal. So, need to clean them up in logical order. Also added some asserts to make sure deferred RCEs can only be used while in the initial required range. [Substrate:c84b5929fb7e2e358ed7d8a2fbc84b9dcb4ee810] --- dev/ese/src/ese/_log/logredo.cxx | 4 +++ dev/ese/src/ese/pib.cxx | 10 ++++++ dev/ese/src/ese/ver.cxx | 54 +++++++++++++++++++++++++++++++- dev/ese/src/inc/pib.hxx | 3 ++ 4 files changed, 70 insertions(+), 1 deletion(-) diff --git a/dev/ese/src/ese/_log/logredo.cxx b/dev/ese/src/ese/_log/logredo.cxx index e0124328..c8f06f4a 100644 --- a/dev/ese/src/ese/_log/logredo.cxx +++ b/dev/ese/src/ese/_log/logredo.cxx @@ -3203,6 +3203,7 @@ ERR LOG::ErrLGRIRedoNodeOperation( const LRNODE_ *plrnode, ERR *perr ) } // remove this RCE from the list of uncreated RCEs + Assert( !ppib->FDeferredRceid( plrundoinfo->le_rceid ) || g_rgfmp[ifmp].FContainsDataFromFutureLogs() ); Call( ppib->ErrDeregisterDeferredRceid( plrundoinfo->le_rceid ) ); Assert( trxOld == plrundoinfo->le_trxBegin0 ); @@ -3491,6 +3492,7 @@ ERR LOG::ErrLGRIRedoNodeOperation( const LRNODE_ *plrnode, ERR *perr ) // add this RCE to the list of uncreated RCEs if ( plrfiard->FVersioned() ) { + Assert( g_rgfmp[ifmp].FContainsDataFromFutureLogs() ); Call( ppib->ErrRegisterDeferredRceid( plrfiard->le_rceidReplace, pgno ) ); } @@ -3511,6 +3513,7 @@ ERR LOG::ErrLGRIRedoNodeOperation( const LRNODE_ *plrnode, ERR *perr ) // add this RCE to the list of uncreated RCEs if ( plrreplace->FVersioned() ) { + Assert( g_rgfmp[ifmp].FContainsDataFromFutureLogs() ); Call( ppib->ErrRegisterDeferredRceid( plrnode->le_rceid, pgno ) ); } @@ -3636,6 +3639,7 @@ ERR LOG::ErrLGRIRedoNodeOperation( const LRNODE_ *plrnode, ERR *perr ) // add this RCE to the list of uncreated RCEs if ( plrflagdelete->FVersioned() ) { + Assert( g_rgfmp[ifmp].FContainsDataFromFutureLogs() ); Call( ppib->ErrRegisterDeferredRceid( plrnode->le_rceid, pgno ) ); } diff --git a/dev/ese/src/ese/pib.cxx b/dev/ese/src/ese/pib.cxx index ff695e26..7422cb69 100644 --- a/dev/ese/src/ese/pib.cxx +++ b/dev/ese/src/ese/pib.cxx @@ -271,6 +271,16 @@ VOID PIB::AssertNoDeferredRceid() const AssertRTL( m_redblacktreeRceidDeferred.FEmpty() ); } +#ifdef DEBUG +// ================================================================ +ERR PIB::FDeferredRceid( const RCEID& rceid ) +// ================================================================ +{ + Assert( rceidNull != rceid ); + return ( CRedBlackTree::ERR::errSuccess == m_redblacktreeRceidDeferred.ErrFind( rceid )); +} +#endif + // ================================================================ ERR PIB::ErrRegisterRceid( const RCEID rceid, RCE * const prce) // ================================================================ diff --git a/dev/ese/src/ese/ver.cxx b/dev/ese/src/ese/ver.cxx index 050fa6b5..5dd9557d 100644 --- a/dev/ese/src/ese/ver.cxx +++ b/dev/ese/src/ese/ver.cxx @@ -1218,7 +1218,29 @@ INLINE VOID RCE::SetPrcePrevOfNode( RCE * prce ) Assert( FAssertRwlHashAsWriter_() ); Assert( prceNil == prce || RceidCmp( m_rceid, prce->Rceid() ) > 0 ); - m_prcePrevOfNode = prce; +#ifdef DEBUG + if ( prce ) + { + const BOOL fPrevRCEIsDelete = ( operFlagDelete == prce->m_oper && !prce->FMoved() ); + if ( fPrevRCEIsDelete ) + { + switch ( m_oper ) + { + case operInsert: + case operPreInsert: + case operWriteLock: + // these are the only valid operations after a delete + break; + + default: + { + Assert( fFalse ); + } + } + } + } +#endif + m_prcePrevOfNode = prce; } @@ -5994,6 +6016,36 @@ ERR RCE::ErrPrepareToDeallocate( TRX trxOldest ) FCB * const pfcb = prce->Pfcb(); ENTERCRITICALSECTION enterCritFCBRCEList( &( pfcb->CritRCEList() ) ); + // Except for recovery, this should be the HEAD RCE for this node. Recovery can + // allocate RCEs out of order because of deferred RCEs, so make sure to still clean + // them up in order. + Assert( PinstFromIfmp( m_ifmp )->FRecovering() || m_prcePrevOfNode == NULL ); + if ( PinstFromIfmp( m_ifmp )->FRecovering() ) + { + RCE *prceFirst = this; + while ( prceFirst->m_prcePrevOfNode != NULL ) + { + Assert( prceFirst->m_prcePrevOfNode->m_prceNextOfNode == prceFirst ); + prceFirst = prceFirst->m_prcePrevOfNode; + } + + while ( prceFirst != this ) + { + RCE *prceNext = prceFirst->PrceNextOfNode(); + + ASSERT_VALID( prceFirst ); + Assert( !prceFirst->FOperNull() ); + // Only deferrable oper's like FlagDelete/Replace should end up out-of-order + Assert( prceFirst->Oper() == operFlagDelete || prceFirst->Oper() == operReplace ); + Assert( prceFirst->FFullyCommitted() ); + Assert( TrxCmp( prceFirst->TrxCommitted(), trxOldest ) < 0 ); + + VERINullifyCommittedRCE( prceFirst ); + + prceFirst = prceNext; + } + } + do { RCE *prceNext; diff --git a/dev/ese/src/inc/pib.hxx b/dev/ese/src/inc/pib.hxx index 5be50fd0..6ddfbdee 100644 --- a/dev/ese/src/inc/pib.hxx +++ b/dev/ese/src/inc/pib.hxx @@ -511,6 +511,9 @@ public: ERR ErrDeregisterDeferredRceid( const RCEID& ); VOID RemoveAllDeferredRceid(); VOID AssertNoDeferredRceid() const; +#ifdef DEBUG + BOOL FDeferredRceid( const RCEID& ); +#endif ERR ErrRegisterRceid( const RCEID rceid, RCE * const prce); ERR ErrDeregisterRceid( const RCEID rceid ); From effbe46b3402a7b8ffd3d0ef30551dc7fb96b6d7 Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Mon, 31 Oct 2022 21:42:20 +0000 Subject: [PATCH 080/102] ESE Block Cache: improve IO efficiency and general cleanup of CRequest The largest impact of this change is that we change how CRequests are processed by the AsyncIOWorker so that it handles IO handshaking via just the master CRequest per IO rather than checking all CRequests in the IO. This results in the AsyncIOWorker being cued far less often (once per IO rather than once per CRequest). For similar reasons, we also call IFileAPI::ErrIOIssue less often. Other changes: - CRequest now directly owns the pointer to the associated TLS (if async) and manages its lifecycle. this also improves debuggability - CRequest has more debug tracking state for request and issue of IO - CRequest allocation is now pooled - CRequest::WaitForIORangeLock now directly cues the AsyncIOWorker via the TLS rather than using an abstracted delegate - TPool cleanup was becoming unmanageable so a static self registration scheme was added to enable cleanup of all TPool instances with a single call [Substrate:afb3521f0b279885728c7c49ae68cffd687c6ba7] --- dev/ese/src/os/blockcache/_common.hxx | 83 ++- dev/ese/src/os/blockcache/_filefilter.hxx | 12 - .../src/os/blockcache/_filefilterwrapper.hxx | 2 - dev/ese/src/os/blockcache/_filewrapper.hxx | 7 - .../src/os/blockcache/_hashedlrukcache.hxx | 489 +++++++++--------- .../_hashedlrukcachethreadlocalstorage.hxx | 12 - dev/ese/src/os/osblockcache.cxx | 3 +- 7 files changed, 326 insertions(+), 282 deletions(-) diff --git a/dev/ese/src/os/blockcache/_common.hxx b/dev/ese/src/os/blockcache/_common.hxx index 8c1808eb..53d95df5 100644 --- a/dev/ese/src/os/blockcache/_common.hxx +++ b/dev/ese/src/os/blockcache/_common.hxx @@ -327,6 +327,53 @@ INLINE const char* OSFormatFileId( _In_ ICache* const pc ) // Pool of objects with a minimum lifetime +template< class T = void > +class TStateBase +{ + public: + + static void CleanupAll() + { + for ( TStateBase* pstate = s_il.PrevMost(); pstate; pstate = s_il.Next( pstate ) ) + { + pstate->CleanupThis(); + } + } + + protected: + + TStateBase() + { + s_il.InsertAsNextMost( this ); + } + + virtual ~TStateBase() + { + s_il.Remove( this ); + } + + virtual void CleanupThis() {} + + static SIZE_T OffsetOfILE() { return OffsetOf( TStateBase, m_ile ); } + + private: + + static CInvasiveList, TStateBase::OffsetOfILE> s_il; + + typename CInvasiveList, TStateBase::OffsetOfILE>::CElement m_ile; +}; + +template< class T > +CInvasiveList, TStateBase::OffsetOfILE> TStateBase::s_il; + +class CStateBase : public TStateBase<> +{ +}; + +class CPoolRepository : public CStateBase +{ +}; + template< class T, BOOL fHeap = fTrue, TICK dtickMin = 10 * 1000 > class TPool { @@ -341,20 +388,20 @@ class TPool { void* pv = NULL; - if ( s_state.FInit() && s_state.m_il.PrevMost() ) + if ( s_state.FInit() && s_state.Il().PrevMost() ) { - s_state.m_crit.Enter(); + s_state.Crit().Enter(); - CHeader* pheader = s_state.m_il.PrevMost(); + CHeader* pheader = s_state.Il().PrevMost(); pheader = pheader && pheader->Cb() >= cb ? pheader : NULL; if ( pheader ) { - s_state.m_il.Remove( pheader ); + s_state.Il().Remove( pheader ); } - s_state.m_crit.Leave(); + s_state.Crit().Leave(); if ( pheader ) { @@ -405,23 +452,23 @@ class TPool pv = NULL; } - s_state.m_crit.Enter(); + s_state.Crit().Enter(); if ( pheader ) { - s_state.m_il.InsertAsPrevMost( pheader ); + s_state.Il().InsertAsPrevMost( pheader ); pheader = NULL; } - while ( s_state.m_il.NextMost() && s_state.m_il.NextMost()->FRelease() ) + while ( s_state.Il().NextMost() && s_state.Il().NextMost()->FRelease() ) { - pheader = s_state.m_il.NextMost(); - s_state.m_il.Remove( pheader ); + pheader = s_state.Il().NextMost(); + s_state.Il().Remove( pheader ); il.InsertAsNextMost( pheader ); pheader = NULL; } - s_state.m_crit.Leave(); + s_state.Crit().Leave(); s_state.Release( il ); @@ -430,7 +477,7 @@ class TPool static void Cleanup() { - s_state.Release( s_state.m_il ); + s_state.CleanupThis(); } private: @@ -487,7 +534,7 @@ class TPool private: - class CState + class CState : CStateBase { public: @@ -500,11 +547,19 @@ class TPool ~CState() { m_fInit = fFalse; + CleanupThis(); + } + + void CleanupThis() override + { Release( m_il ); } BOOL FInit() const { return m_fInit; } + CCriticalSection& Crit() { return m_crit; } + CCountedInvasiveList& Il() { return m_il; } + static void Release( CInvasiveList& il ) { while ( CHeader* const pheader = il.PrevMost() ) @@ -516,6 +571,8 @@ class TPool } } + private: + BOOL m_fInit; CCriticalSection m_crit; typename CCountedInvasiveList m_il; diff --git a/dev/ese/src/os/blockcache/_filefilter.hxx b/dev/ese/src/os/blockcache/_filefilter.hxx index 5b5df9cc..0cc11aea 100644 --- a/dev/ese/src/os/blockcache/_filefilter.hxx +++ b/dev/ese/src/os/blockcache/_filefilter.hxx @@ -893,11 +893,6 @@ class TFileFilter // ff CCountedInvasiveList& IlRequestsByIO() { return m_ilRequestsByIO; } - static void Cleanup() - { - CPool::Cleanup(); - } - private: const VolumeId m_volumeid; @@ -2204,11 +2199,6 @@ class TFileFilter // ff m_fReleaseWriteback = fFalse; } - static void Cleanup() - { - CPool::Cleanup(); - } - static void Complete_( _In_ const ERR err, _In_ const VolumeId volumeid, _In_ const FileId fileid, @@ -2429,8 +2419,6 @@ TFileFilter::~TFileFilter() template void TFileFilter::Cleanup() { - CThreadLocalStorage::Cleanup(); - CIOComplete::Cleanup(); CThreadLocalStorageRepository::Cleanup(); CThrottleContextRepository::Cleanup(); } diff --git a/dev/ese/src/os/blockcache/_filefilterwrapper.hxx b/dev/ese/src/os/blockcache/_filefilterwrapper.hxx index e3d4fef4..8682c3ec 100644 --- a/dev/ese/src/os/blockcache/_filefilterwrapper.hxx +++ b/dev/ese/src/os/blockcache/_filefilterwrapper.hxx @@ -339,8 +339,6 @@ class CFileFilterWrapper : public TFileFilterWrapper : TFileFilterWrapper( pff, iom ) { } - - static void Cleanup() { CIOComplete::Cleanup(); } }; diff --git a/dev/ese/src/os/blockcache/_filewrapper.hxx b/dev/ese/src/os/blockcache/_filewrapper.hxx index 06a1f105..c9936dc1 100644 --- a/dev/ese/src/os/blockcache/_filewrapper.hxx +++ b/dev/ese/src/os/blockcache/_filewrapper.hxx @@ -168,11 +168,6 @@ class TFileWrapper // fw CPool::Free( &pvT ); } - static void Cleanup() - { - CPool::Cleanup(); - } - protected: virtual ~CIOComplete() @@ -839,6 +834,4 @@ class CFileWrapper : public TFileWrapper } virtual ~CFileWrapper() {} - - static void Cleanup() { CIOComplete::Cleanup(); } }; diff --git a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx index 58f74186..28db010d 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx @@ -105,17 +105,18 @@ class THashedLRUKCache { public: - CRequest( _In_ const BOOL fRead, - _In_ THashedLRUKCache* const pc, - _In_ const TraceContext& tc, - _Inout_ CHashedLRUKCachedFileTableEntry** const ppcfte, - _In_ const QWORD ibOffset, - _In_ const DWORD cbData, - _In_reads_( cbData ) const BYTE* const pbData, - _In_ const OSFILEQOS grbitQOS, - _In_ const ICache::CachingPolicy cp, - _In_opt_ const ICache::PfnComplete pfnComplete, - _In_opt_ const DWORD_PTR keyComplete ) + CRequest( _In_ const BOOL fRead, + _In_ THashedLRUKCache* const pc, + _In_ const TraceContext& tc, + _Inout_ CHashedLRUKCachedFileTableEntry** const ppcfte, + _In_ const QWORD ibOffset, + _In_ const DWORD cbData, + _In_reads_( cbData ) const BYTE* const pbData, + _In_ const OSFILEQOS grbitQOS, + _In_ const ICache::CachingPolicy cp, + _In_opt_ const ICache::PfnComplete pfnComplete, + _In_opt_ const DWORD_PTR keyComplete, + _Inout_opt_ CHashedLRUKCacheThreadLocalStorage** const ppctls ) : THashedLRUKCacheBase::CRequest( fRead, pc, tc, @@ -127,6 +128,7 @@ class THashedLRUKCache cp, pfnComplete, keyComplete ), + m_pctls( *ppctls ), m_rgcwcc { CClusterWriteCompletionContext( this, (CMeteredSection::Group)0 ), @@ -136,19 +138,40 @@ class THashedLRUKCache m_cCachedFileIO( 0 ), m_fCacheHit( fFalse ), m_cCachingFileIO( 0 ), + m_prequestIO( NULL ), m_iorl( this ), - m_pfnIORangeLockAcquired( NULL ), - m_keyIORangeLockAcquired( NULL ), m_fIORangeLockAcquired( fFalse ), + m_fCachedFileIORequested( fFalse ), + m_fCachingFileIORequested( fFalse ), m_fIORequested( fFalse ), m_cIOPending( 0 ), - m_pctls( NULL ), - m_pmsig( NULL ), + m_fCachedFileIOIssued( fFalse ), + m_fCachingFileIOIssued( fFalse ), + m_pmsigWaitForIOCompleted( NULL ), m_fWaitForIOCompleted( fFalse ) { - m_ilRequestsByIO.InsertAsPrevMost( this ); + *ppctls = NULL; + AddRequestToIOAsPrevMost( this ); } +#pragma push_macro( "new" ) +#undef new + + using CPool = TPool; + + void* operator new( _In_ const size_t cb ) + { + return CPool::PvAllocate(); + } + + void operator delete( _In_opt_ void* const pv ) + { + void* pvT = pv; + CPool::Free( &pvT ); + } + +#pragma pop_macro( "new" ) + BOOL FRead() const { return THashedLRUKCacheBase::CRequest::FRead(); } THashedLRUKCache* Pc() const { return (THashedLRUKCache*)THashedLRUKCacheBase::CRequest::Pc(); } CHashedLRUKCachedFileTableEntry* Pcfte() const { return THashedLRUKCacheBase::CRequest::Pcfte(); } @@ -157,6 +180,8 @@ class THashedLRUKCache OSFILEQOS GrbitQOS() const { return THashedLRUKCacheBase::CRequest::GrbitQOS(); } ICache::CachingPolicy Cp() const { return THashedLRUKCacheBase::CRequest::Cp(); } double PctWrite() const { return THashedLRUKCacheBase::CRequest::PctWrite(); } + BOOL FSync() const { return THashedLRUKCacheBase::CRequest::FSync(); } + CHashedLRUKCacheThreadLocalStorage* Pctls() const { return m_pctls; } ERR ErrStatus() const { return THashedLRUKCacheBase::CRequest::ErrStatus(); } typename CHashedLRUKCachedFileTableEntry::CIORangeLockBase* Piorl() { return &m_iorl; } @@ -166,27 +191,23 @@ class THashedLRUKCache COffsets OffsetsForIO() const { - CRequest* const prequestIOFirst = m_ilRequestsByIO.PrevMost(); + Assert( this == PrequestIO() ); + + CRequest* const prequestIOFirst = IlRequestsByIO().PrevMost(); const QWORD ibStartIOFirst = prequestIOFirst->Offsets().IbStart(); - CRequest* const prequestIOLast = m_ilRequestsByIO.NextMost(); + CRequest* const prequestIOLast = IlRequestsByIO().NextMost(); const QWORD ibEndIOLast = prequestIOLast->Offsets().IbEnd(); return COffsets( ibStartIOFirst, ibEndIOLast ); } - typedef void (*PfnIORangeLockAcquired)( const DWORD_PTR dwCompletionKey ); - - void WaitForIORangeLock( _In_opt_ CRequest::PfnIORangeLockAcquired pfnIORangeLockAcquired = NULL, - _In_opt_ const DWORD_PTR keyIORangeLockAcquired = NULL ) + void WaitForIORangeLock() { - if ( pfnIORangeLockAcquired ) - { - // save the completion context - - m_pfnIORangeLockAcquired = pfnIORangeLockAcquired; - m_keyIORangeLockAcquired = keyIORangeLockAcquired; + Assert( this == PrequestIO() ); - // request the IO range lock, calling the completion when acquired + if ( !FSync() ) + { + // request the IO range lock async which will be completed via our CIORangeLock::Grant impl const BOOL fIORangeLockPending = Pcfte()->FTryRequestIORangeLock( Piorl() ); EnforceSz( fIORangeLockPending, "WaitForIORangeLock" ); @@ -199,20 +220,20 @@ class THashedLRUKCache } } - void WaitForIO( _In_ CHashedLRUKCacheThreadLocalStorage* const pctls = NULL ) + void WaitForIO() { CManualResetSignal msig( CSyncBasicInfo( "THashedLRUKCache::CRequest::WaitForIO::msig" ) ); + Assert( this == PrequestIO() ); Assert( !m_fWaitForIOCompleted ); IssueIO(); // setup to wait for the IO to complete either sync or async - m_pctls = pctls; - m_pmsig = pctls ? NULL : &msig; + m_pmsigWaitForIOCompleted = !FSync() ? NULL : &msig; - if ( pctls ) + if ( !FSync() ) { msig.Set(); } @@ -228,13 +249,18 @@ class THashedLRUKCache void ResetWaitForIO() { + Assert( this == PrequestIO() ); Assert( FWaitForIOCompleted() ); + m_fCachedFileIORequested = fFalse; + m_fCachedFileIOIssued = fFalse; + m_fCachingFileIORequested = fFalse; + m_fCachingFileIOIssued = fFalse; + m_fIORequested = fFalse; AtomicExchange( (LONG*)&m_cIOPending, 0 ); m_fWaitForIOCompleted = fFalse; - m_pctls = NULL; - m_pmsig = NULL; + m_pmsigWaitForIOCompleted = NULL; } ERR ErrWriteCluster( _In_ ICachedBlockSlab* const pcbs, @@ -245,7 +271,7 @@ class THashedLRUKCache ERR err = JET_errSuccess; const CClusterWriteCompletionContext* pcwcc = PcwccGetClusterCompletionContext(); - Assert( !m_fWaitForIOCompleted ); + Assert( !PrequestIO()->m_fWaitForIOCompleted ); OSTrace( JET_tracetagBlockCacheOperations, OSFormat( "C=%s R=0x%016I64x F=%s Write Cluster %s", @@ -257,13 +283,14 @@ class THashedLRUKCache Call( pcwcc->ErrWriteCluster( pcbs, slot, cb, rgb ) ); pcwcc = NULL; - m_cCachingFileIO++; + CachingFileIORequest(); // we must immediately issue after writing a cluster to avoid a deadlock in ErrFlushClusters when // this thread must block to access another slab. the cluster write is in TLS and can't be // flushed by ErrFlushClusters. this should not hurt IO coalescing in any core scenario - IssueIO(); + CallS( Pc()->PffCaching()->ErrIOIssue() ); + PrequestIO()->m_fCachingFileIOIssued = fTrue; HandleError: if ( pcwcc ) @@ -280,7 +307,7 @@ class THashedLRUKCache { ERR err = JET_errSuccess; - Assert( !m_fWaitForIOCompleted ); + Assert( !PrequestIO()->m_fWaitForIOCompleted ); OSTrace( JET_tracetagBlockCacheOperations, OSFormat( "C=%s R=0x%016I64x F=%s Read Cluster %s", @@ -297,7 +324,7 @@ class THashedLRUKCache ClusterReadHandoff_ ) ); m_fCacheHit = fTrue; - m_cCachingFileIO++; + CachingFileIORequest(); HandleError: return err; @@ -308,7 +335,7 @@ class THashedLRUKCache ERR err = JET_errSuccess; BYTE* const pbData = (BYTE*)PbData() + ibOffset - Offsets().IbStart(); - Assert( !m_fWaitForIOCompleted ); + Assert( !PrequestIO()->m_fWaitForIOCompleted ); OSTrace( JET_tracetagBlockCacheOperations, OSFormat( "C=%s R=0x%016I64x F=%s Read Block ib=%llu cb=%u", @@ -321,7 +348,7 @@ class THashedLRUKCache Call( ErrRead( Pcfte()->Pff(), ibOffset, cbData, pbData, iomCacheMiss ) ); m_fCacheMiss = fTrue; - m_cCachedFileIO++; + CachedFileIORequest(); HandleError: return err; @@ -332,7 +359,7 @@ class THashedLRUKCache ERR err = JET_errSuccess; const BYTE* const pbData = PbData() + ibOffset - Offsets().IbStart(); - Assert( !m_fWaitForIOCompleted ); + Assert( !PrequestIO()->m_fWaitForIOCompleted ); OSTrace( JET_tracetagBlockCacheOperations, OSFormat( "C=%s R=0x%016I64x F=%s Write Block ib=%llu cb=%u", @@ -344,7 +371,7 @@ class THashedLRUKCache Call( ErrWrite( Pcfte()->Pff(), ibOffset, cbData, pbData, iomCacheWriteThrough ) ); - m_cCachedFileIO++; + CachedFileIORequest(); HandleError: return err; @@ -374,7 +401,26 @@ class THashedLRUKCache static SIZE_T OffsetOfIOs() { return OffsetOf( CRequest, m_ileIOs ); } static SIZE_T OffsetOfRequestsByIO() { return OffsetOf( CRequest, m_ileRequestsByIO ); } - CCountedInvasiveList& IlRequestsByIO() { return m_ilRequestsByIO; } + const CCountedInvasiveList& IlRequestsByIO() const { return m_ilRequestsByIO; } + CRequest* PrequestIO() const { return m_prequestIO; } + + void RemoveRequestFromIO( _In_ CRequest* const prequest ) + { + m_ilRequestsByIO.Remove( prequest ); + prequest->m_prequestIO = NULL; + } + + void AddRequestToIOAsPrevMost( _In_ CRequest* const prequest ) + { + m_ilRequestsByIO.InsertAsPrevMost( prequest ); + prequest->m_prequestIO = this; + } + + void AddRequestToIOAsNextMost( _In_ CRequest* const prequest ) + { + m_ilRequestsByIO.InsertAsNextMost( prequest ); + prequest->m_prequestIO = this; + } public: @@ -406,71 +452,23 @@ class THashedLRUKCache ~CRequest() { Assert( FWaitForIOCompleted() ); + + CHashedLRUKCacheThreadLocalStorage* pctlsT = m_pctls; + CHashedLRUKCacheThreadLocalStorage::Release( &pctlsT ); } void Start() override { THashedLRUKCacheBase::CRequest::Start(); - AddRefIO(); + PrequestIO()->AddRefIO(); } void Finish( _In_ const ERR err ) override { THashedLRUKCacheBase::CRequest::Finish( err ); - ReleaseRefIO(); - } - - void AddRefIO() - { - m_fIORequested = fTrue; - AtomicIncrement( (LONG*)&m_cIOPending ); - } - - void ReleaseRefIO() - { - if ( AtomicDecrement( (LONG*)&m_cIOPending ) == -1 ) - { - WaitForIOComplete(); - } - } - - void WaitForIOComplete() - { - // cache the completion context - // - // NOTE: this is stable while m_cIOPending != -1 - - CHashedLRUKCacheThreadLocalStorage* pctls = m_pctls; - CManualResetSignal* const pmsig = m_pmsig; - - // if this is an async completion then we must reference the TLS so it isn't released - - if ( pctls ) - { - pctls->AddRef(); - } - - // indicate that the IO is complete - // - // NOTE: we cannot touch the object after this point because it can be released by the - // Async IO Worker immediately - - m_fWaitForIOCompleted = fTrue; - - // do the completion notification - - if ( pctls ) - { - pctls->CueAsyncIOWorker(); - CHashedLRUKCacheThreadLocalStorage::Release( &pctls ); - } - - if ( pmsig ) - { - pmsig->Set(); - } + PrequestIO()->ReleaseRefIO(); } private: @@ -556,12 +554,12 @@ class THashedLRUKCache { CHashedLRUKCachedFileTableEntry::CIORangeLockBase::Grant(); - if ( m_prequest->m_pfnIORangeLockAcquired ) + m_prequest->m_fIORangeLockAcquired = fTrue; + + if ( !m_prequest->FSync() ) { - m_prequest->m_pfnIORangeLockAcquired( m_prequest->m_keyIORangeLockAcquired ); + m_prequest->Pctls()->CueAsyncIOWorker(); } - - m_prequest->m_fIORangeLockAcquired = fTrue; } private: @@ -625,28 +623,101 @@ class THashedLRUKCache ERR ErrRelease( _In_ const ERR err ) { - IssueIO(); - return THashedLRUKCacheBase::CRequest::ErrRelease( err ); } + void CachingFileIORequest() + { + m_cCachingFileIO++; + PrequestIO()->m_fCachingFileIORequested = fTrue; + } + + void CachedFileIORequest() + { + m_cCachedFileIO++; + PrequestIO()->m_fCachedFileIORequested = fTrue; + } + + void AddRefIO() + { + Assert( this == PrequestIO() ); + + m_fIORequested = fTrue; + AtomicIncrement( (LONG*)&m_cIOPending ); + } + + void ReleaseRefIO() + { + Assert( this == PrequestIO() ); + + if ( AtomicDecrement( (LONG*)&m_cIOPending ) == -1 ) + { + WaitForIOComplete(); + } + } + + void WaitForIOComplete() + { + Assert( this == PrequestIO() ); + + // cache the completion context + // + // NOTE: this is stable while m_cIOPending != -1 + + const BOOL fSync = FSync(); + CHashedLRUKCacheThreadLocalStorage* pctls = Pctls(); + CManualResetSignal* const pmsig = m_pmsigWaitForIOCompleted; + + // if this is an async completion then we must reference the TLS so it isn't released as a side + // effect of releasing this CRequest before we can signal it + + if ( !fSync ) + { + pctls->AddRef(); + } + + // indicate that the IO is complete + // + // NOTE: we cannot touch the object after this point because it can be released by the + // Async IO Worker immediately + + m_fWaitForIOCompleted = fTrue; + + // do the completion notification + + if ( !fSync ) + { + pctls->CueAsyncIOWorker(); + CHashedLRUKCacheThreadLocalStorage::Release( &pctls ); + } + else + { + pmsig->Set(); + } + } + void IssueIO() { - if ( m_cCachedFileIO ) + Assert( this == PrequestIO() ); + + if ( m_fCachedFileIORequested ) { - m_cCachedFileIO = 0; + m_fCachedFileIORequested = fFalse; CallS( Pcfte()->Pff()->ErrIssue( FRead() ? iomCacheMiss : iomCacheWriteThrough ) ); + m_fCachedFileIOIssued = fTrue; } - if ( m_cCachingFileIO ) + if ( m_fCachingFileIORequested ) { - m_cCachingFileIO = 0; + m_fCachingFileIORequested = fFalse; CallS( Pc()->PffCaching()->ErrIOIssue() ); + m_fCachingFileIOIssued = fTrue; } } private: + CHashedLRUKCacheThreadLocalStorage* const m_pctls; const CClusterWriteCompletionContext m_rgcwcc[ 2 ]; BOOL m_fCacheMiss; int m_cCachedFileIO; @@ -656,15 +727,17 @@ class THashedLRUKCache typename CCountedInvasiveList::CElement m_ileIOs; CCountedInvasiveList m_ilRequestsByIO; typename CCountedInvasiveList::CElement m_ileRequestsByIO; + CRequest* m_prequestIO; CIORangeLock m_iorl; - CRequest::PfnIORangeLockAcquired m_pfnIORangeLockAcquired; - DWORD_PTR m_keyIORangeLockAcquired; BOOL m_fIORangeLockAcquired; + BOOL m_fCachedFileIORequested; + BOOL m_fCachingFileIORequested; BOOL m_fIORequested; volatile LONG m_cIOPending; + BOOL m_fCachedFileIOIssued; + BOOL m_fCachingFileIOIssued; + CManualResetSignal* m_pmsigWaitForIOCompleted; BOOL m_fWaitForIOCompleted; - CHashedLRUKCacheThreadLocalStorage* m_pctls; - CManualResetSignal* m_pmsig; }; // Update Slab Visitor @@ -4880,7 +4953,7 @@ class THashedLRUKCache ERR ErrFlush(); - ERR ErrEnqueue( _Inout_ CRequest** const pprequest ); + void Enqueue( _Inout_ CRequest** const pprequest ); BOOL FConflicting( _In_ CRequest* const prequestIOA, _In_ CRequest* const prequestIOB ); BOOL FCombinable( _In_ CRequest* const prequestIOA, _In_ CRequest* const prequestIOB ); BOOL FOverrideMaxSize( _In_ CRequest* const prequestIO ); @@ -4893,12 +4966,9 @@ class THashedLRUKCache ERR ErrSynchronousIO( _In_ CRequest* const prequest ); void WaitForIORangeLock( _In_ CRequest* const prequest ); - BOOL FWaitForIORangeLock( _In_ CRequest* const prequest, - _In_opt_ CHashedLRUKCacheThreadLocalStorage* const pctls = NULL ); - void ReleaseIORangeLock( _In_ CRequest* const prequest, - _In_opt_ CHashedLRUKCacheThreadLocalStorage* const pctls = NULL ); - void ReleaseIORangeLockedCounts( _In_ CRequest* const prequest, - _In_opt_ CHashedLRUKCacheThreadLocalStorage* const pctls = NULL ); + BOOL FWaitForIORangeLock( _In_ CRequest* const prequest ); + void ReleaseIORangeLock( _In_ CRequest* const prequest ); + void ReleaseIORangeLockedCounts( _In_ CRequest* const prequest ); BOOL FAcquireIORangeLockedBySlabCounts( _In_ CRequest* const prequest, _In_ const BOOL fFirstRequest ); void ReleaseIORangeLockedBySlabCounts( _In_ CHashedLRUKCachedFileTableEntry* const pcfte, _In_ const COffsets& offsets ); @@ -4907,11 +4977,6 @@ class THashedLRUKCache void RequestCachedFileIO( _In_ CRequest* const prequestIO ); void RequestCachingFileIO( _In_ CRequest* const prequestIO ); void RequestIO( _In_ CRequest* const prequestIO ); - void WaitForPendingIOAsync( _In_ CHashedLRUKCacheThreadLocalStorage* const pctls, - _In_ CRequest* const prequestIO ); - void WaitForPendingIO( _In_ CRequest* const prequestIO ); - BOOL FCompletedIO( _In_ CRequest* const prequestIO ); - void ClearIOCompletion( _In_ CRequest* const prequestIO ); void RequestFinalizeIO( _In_ CRequest* const prequestIO ); void RequestIO( _In_ CRequest* const prequestIO, @@ -6016,10 +6081,18 @@ ERR THashedLRUKCache::ErrRead( _In_ const TraceContext& _In_opt_ const ICache::PfnComplete pfnComplete, _In_opt_ const DWORD_PTR keyComplete ) { - ERR err = JET_errSuccess; - CHashedLRUKCachedFileTableEntry* pcfte = NULL; - const BOOL fAsync = pfnComplete != NULL; - CRequest* prequest = NULL; + ERR err = JET_errSuccess; + CHashedLRUKCacheThreadLocalStorage* pctls = NULL; + CHashedLRUKCachedFileTableEntry* pcfte = NULL; + const BOOL fAsync = pfnComplete != NULL; + CRequest* prequest = NULL; + + // get our thread local storage if we will be enqueuing requests + + if ( fAsync ) + { + Call( ErrGetThreadLocalStorage( &pctls ) ); + } // get the cached file @@ -6041,14 +6114,15 @@ ERR THashedLRUKCache::ErrRead( _In_ const TraceContext& grbitQOS, cp, pfnComplete, - keyComplete ) ); + keyComplete, + &pctls ) ); // if this request is async then we must perform the request async to avoid blocking the request for sync reads to // read our cache state. otherwise, perform it inline to allow a sync request to be performed directly if ( fAsync ) { - Call( ErrEnqueue( &prequest ) ); + Enqueue( &prequest ); } else { @@ -6058,6 +6132,7 @@ ERR THashedLRUKCache::ErrRead( _In_ const TraceContext& HandleError: err = CRequest::ErrRelease( &prequest, err ); ReleaseCachedFile( &pcfte ); + CHashedLRUKCacheThreadLocalStorage::Release( &pctls ); Assert( !FAnyOpenSlab() ); return err; } @@ -6075,10 +6150,18 @@ ERR THashedLRUKCache::ErrWrite( _In_ const TraceContext& _In_opt_ const ICache::PfnComplete pfnComplete, _In_opt_ const DWORD_PTR keyComplete ) { - ERR err = JET_errSuccess; - CHashedLRUKCachedFileTableEntry* pcfte = NULL; - const BOOL fAsync = pfnComplete != NULL; - CRequest* prequest = NULL; + ERR err = JET_errSuccess; + CHashedLRUKCacheThreadLocalStorage* pctls = NULL; + CHashedLRUKCachedFileTableEntry* pcfte = NULL; + const BOOL fAsync = pfnComplete != NULL; + CRequest* prequest = NULL; + + // get our thread local storage if we will be enqueuing requests + + if ( fAsync ) + { + Call( ErrGetThreadLocalStorage( &pctls ) ); + } // get the cached file @@ -6100,14 +6183,15 @@ ERR THashedLRUKCache::ErrWrite( _In_ const TraceContext& grbitQOS, cp, pfnComplete, - keyComplete ) ); + keyComplete, + &pctls ) ); // if this request is async then we must perform the request async to avoid blocking the request for sync reads to // read our cache state. otherwise, perform it inline to allow a sync request to be performed directly if ( fAsync ) { - Call( ErrEnqueue( &prequest ) ); + Enqueue( &prequest ); } else { @@ -6117,6 +6201,7 @@ ERR THashedLRUKCache::ErrWrite( _In_ const TraceContext& HandleError: err = CRequest::ErrRelease( &prequest, err ); ReleaseCachedFile( &pcfte ); + CHashedLRUKCacheThreadLocalStorage::Release( &pctls ); Assert( !FAnyOpenSlab() ); return err; } @@ -8143,20 +8228,15 @@ HandleError: } template -ERR THashedLRUKCache::ErrEnqueue( _Inout_ CRequest** const pprequest ) +void THashedLRUKCache::Enqueue( _Inout_ CRequest** const pprequest ) { - ERR err = JET_errSuccess; - CHashedLRUKCacheThreadLocalStorage* pctls = NULL; - CRequest* prequest = *pprequest; - CRequest* prequestIO = NULL; - CRequest* prequestIOPrev = NULL; + CRequest* prequest = *pprequest; + CHashedLRUKCacheThreadLocalStorage* const pctls = prequest->Pctls(); + CRequest* prequestIO = NULL; + CRequest* prequestIOPrev = NULL; *pprequest = NULL; - // get our thread local storage where we are enqueuing requests - - Call( ErrGetThreadLocalStorage( &pctls ) ); - // enqueue the request, combining and ordering with previous requests // // NOTE: we do not handle IO gap coalescing here. we leave that to the OS layer underneath us @@ -8174,16 +8254,16 @@ ERR THashedLRUKCache::ErrEnqueue( _Inout_ CRequest** const pprequest ) { while ( CRequest* prequestT = prequestIO->IlRequestsByIO().NextMost() ) { - prequestIO->IlRequestsByIO().Remove( prequestT ); - prequestIOPrev->IlRequestsByIO().InsertAsPrevMost( prequestT ); + prequestIO->RemoveRequestFromIO( prequestT ); + prequestIOPrev->AddRequestToIOAsPrevMost( prequestT ); } } else { while ( CRequest* prequestT = prequestIO->IlRequestsByIO().PrevMost() ) { - prequestIO->IlRequestsByIO().Remove( prequestT ); - prequestIOPrev->IlRequestsByIO().InsertAsNextMost( prequestT ); + prequestIO->RemoveRequestFromIO( prequestT ); + prequestIOPrev->AddRequestToIOAsNextMost( prequestT ); } } @@ -8201,11 +8281,6 @@ ERR THashedLRUKCache::ErrEnqueue( _Inout_ CRequest** const pprequest ) // give ownership of the request to the TLS pctls->AddRequest( &prequest ); - -HandleError: - err = CRequest::ErrRelease( &prequest, err ); - CHashedLRUKCacheThreadLocalStorage::Release( &pctls ); - return err; } template @@ -8394,7 +8469,7 @@ void THashedLRUKCache::AsyncIOWorker( _In_ CHashedLRUKCacheThreadLocalStorage { prequestIONext = pctls->IlIOIssued().Next( prequestIO ); - if ( FWaitForIORangeLock( prequestIO, pctls ) ) + if ( FWaitForIORangeLock( prequestIO ) ) { pctls->IlIOIssued().Remove( prequestIO ); pctls->IlIORangeLockPending().InsertAsNextMost( prequestIO ); @@ -8447,7 +8522,7 @@ void THashedLRUKCache::AsyncIOWorker( _In_ CHashedLRUKCacheThreadLocalStorage while ( CRequest* prequestIO = pctls->IlCachingFileIORequested().PrevMost() ) { - WaitForPendingIOAsync( pctls, prequestIO ); + prequestIO->WaitForIO(); pctls->IlCachingFileIORequested().Remove( prequestIO ); pctls->IlIOPending().InsertAsNextMost( prequestIO ); @@ -8462,12 +8537,12 @@ void THashedLRUKCache::AsyncIOWorker( _In_ CHashedLRUKCacheThreadLocalStorage { prequestIONext = pctls->IlIOPending().Next( prequestIO ); - if ( FCompletedIO( prequestIO ) ) + if ( prequestIO->FWaitForIOCompleted() ) { pctls->IlIOPending().Remove( prequestIO ); pctls->IlIOCompleted().InsertAsNextMost( prequestIO ); - ClearIOCompletion( prequestIO ); + prequestIO->ResetWaitForIO(); } } @@ -8485,7 +8560,7 @@ void THashedLRUKCache::AsyncIOWorker( _In_ CHashedLRUKCacheThreadLocalStorage while ( CRequest* prequestIO = pctls->IlFinalizeIORequested().PrevMost() ) { - WaitForPendingIOAsync( pctls, prequestIO ); + prequestIO->WaitForIO(); pctls->IlFinalizeIORequested().Remove( prequestIO ); pctls->IlFinalizeIOPending().InsertAsNextMost( prequestIO ); @@ -8500,7 +8575,7 @@ void THashedLRUKCache::AsyncIOWorker( _In_ CHashedLRUKCacheThreadLocalStorage { prequestIONext = pctls->IlFinalizeIOPending().Next( prequestIO ); - if ( FCompletedIO( prequestIO ) ) + if ( prequestIO->FWaitForIOCompleted() ) { pctls->IlFinalizeIOPending().Remove( prequestIO ); pctls->IlFinalizeIOCompleted().InsertAsNextMost( prequestIO ); @@ -8513,7 +8588,7 @@ void THashedLRUKCache::AsyncIOWorker( _In_ CHashedLRUKCacheThreadLocalStorage { pctls->IlFinalizeIOCompleted().Remove( prequestIO ); - ReleaseIORangeLock( prequestIO, pctls ); + ReleaseIORangeLock( prequestIO ); CRequest* prequestNext = NULL; for ( CRequest* prequest = prequestIO->IlRequestsByIO().PrevMost(); @@ -8522,7 +8597,7 @@ void THashedLRUKCache::AsyncIOWorker( _In_ CHashedLRUKCacheThreadLocalStorage { prequestNext = prequestIO->IlRequestsByIO().Next( prequest ); - prequestIO->IlRequestsByIO().Remove( prequest ); + prequestIO->RemoveRequestFromIO( prequest ); if ( prequest != prequestIO ) { @@ -8560,8 +8635,8 @@ ERR THashedLRUKCache::ErrSynchronousIO( _In_ CRequest* const prequest ) // wait for all the IO to complete even if it is not needed for finalization - WaitForPendingIO( prequest ); - ClearIOCompletion( prequest ); + prequest->WaitForIO(); + prequest->ResetWaitForIO(); // finalize the IO in the cache @@ -8569,7 +8644,7 @@ ERR THashedLRUKCache::ErrSynchronousIO( _In_ CRequest* const prequest ) // wait for any additional IO caused by finalization - WaitForPendingIO( prequest ); + prequest->WaitForIO(); // release the IO range lock @@ -8585,21 +8660,20 @@ void THashedLRUKCache::WaitForIORangeLock( _In_ CRequest* const prequest ) } template -BOOL THashedLRUKCache::FWaitForIORangeLock( _In_ CRequest* const prequest, - _In_opt_ CHashedLRUKCacheThreadLocalStorage* const pctls ) +BOOL THashedLRUKCache::FWaitForIORangeLock( _In_ CRequest* const prequest ) { BOOL fFirstRequest = fFalse; BOOL fRelease = fTrue; BOOL fAcquired = fFalse; - if ( !pctls ) + if ( prequest->FSync() ) { fFirstRequest = fTrue; } else { - fFirstRequest = AtomicIncrement( (DWORD*)&pctls->CIORangeLocked() ) == 1; - AtomicAdd( (QWORD*)&pctls->CbIORangeLocked(), prequest->OffsetsForIO().Cb() ); + fFirstRequest = AtomicIncrement( (DWORD*)&prequest->Pctls()->CIORangeLocked() ) == 1; + AtomicAdd( (QWORD*)&prequest->Pctls()->CbIORangeLocked(), prequest->OffsetsForIO().Cb() ); } AtomicExchangeAdd( (LONG*)&m_cIORangeLockedContext, fFirstRequest ? 1 : 0 ); @@ -8613,47 +8687,44 @@ BOOL THashedLRUKCache::FWaitForIORangeLock( _In_ CRequest* const fAcquired = fTrue; fRelease = fFalse; - prequest->WaitForIORangeLock( pctls ? CHashedLRUKCacheThreadLocalStorage::CueAsyncIOWorker_ : NULL, - (DWORD_PTR)pctls ); + prequest->WaitForIORangeLock(); } if ( fRelease ) { - ReleaseIORangeLockedCounts( prequest, pctls ); + ReleaseIORangeLockedCounts( prequest ); } return fAcquired; } template -void THashedLRUKCache::ReleaseIORangeLock( _In_ CRequest* const prequest, - _In_opt_ CHashedLRUKCacheThreadLocalStorage* const pctls ) +void THashedLRUKCache::ReleaseIORangeLock( _In_ CRequest* const prequest ) { prequest->Piorl()->Release(); ReleaseIORangeLockedBySlabCounts( prequest->Pcfte(), prequest->OffsetsForIO() ); - ReleaseIORangeLockedCounts( prequest, pctls ); + ReleaseIORangeLockedCounts( prequest ); } template -void THashedLRUKCache::ReleaseIORangeLockedCounts( _In_ CRequest* const prequest, - _In_opt_ CHashedLRUKCacheThreadLocalStorage* const pctls ) +void THashedLRUKCache::ReleaseIORangeLockedCounts( _In_ CRequest* const prequest ) { BOOL fLastRequest = fFalse; const QWORD cbIORangeLocked = prequest->OffsetsForIO().Cb(); const QWORD cbIORangeLockedNegative = (QWORD)( -( (LONGLONG)cbIORangeLocked ) ); - if ( !pctls ) + if ( prequest->FSync() ) { fLastRequest = fTrue; } else { - Enforce( pctls->CIORangeLocked() >= 1 ); - fLastRequest = AtomicDecrement( (DWORD*)&pctls->CIORangeLocked() ) == 0; - Enforce( pctls->CbIORangeLocked() >= cbIORangeLocked ); - AtomicAdd( (QWORD*)&pctls->CbIORangeLocked(), cbIORangeLockedNegative ); + Enforce( prequest->Pctls()->CIORangeLocked() >= 1 ); + fLastRequest = AtomicDecrement( (DWORD*)&prequest->Pctls()->CIORangeLocked() ) == 0; + Enforce( prequest->Pctls()->CbIORangeLocked() >= cbIORangeLocked ); + AtomicAdd( (QWORD*)&prequest->Pctls()->CbIORangeLocked(), cbIORangeLockedNegative ); } Enforce( m_cIORangeLockedContext >= (DWORD)( fLastRequest ? 1 : 0 ) ); @@ -8764,56 +8835,6 @@ void THashedLRUKCache::RequestIO( _In_ CRequest* const prequestIO ) RequestIO( prequestIO, fTrue, fTrue ); } -template -void THashedLRUKCache::WaitForPendingIOAsync( _In_ CHashedLRUKCacheThreadLocalStorage* const pctls, - _In_ CRequest* const prequestIO ) -{ - for ( CRequest* prequest = prequestIO->IlRequestsByIO().PrevMost(); - prequest; - prequest = prequestIO->IlRequestsByIO().Next( prequest ) ) - { - prequest->WaitForIO( pctls ); - } -} - -template -void THashedLRUKCache::WaitForPendingIO( _In_ CRequest* const prequestIO ) -{ - for ( CRequest* prequest = prequestIO->IlRequestsByIO().PrevMost(); - prequest; - prequest = prequestIO->IlRequestsByIO().Next( prequest ) ) - { - prequest->WaitForIO(); - } -} - -template -BOOL THashedLRUKCache::FCompletedIO( _In_ CRequest* const prequestIO ) -{ - for ( CRequest* prequest = prequestIO->IlRequestsByIO().PrevMost(); - prequest; - prequest = prequestIO->IlRequestsByIO().Next( prequest ) ) - { - if ( !prequest->FWaitForIOCompleted() ) - { - return fFalse; - } - } - - return fTrue; -} - -template -void THashedLRUKCache::ClearIOCompletion( _In_ CRequest* const prequestIO ) -{ - for ( CRequest* prequest = prequestIO->IlRequestsByIO().PrevMost(); - prequest; - prequest = prequestIO->IlRequestsByIO().Next( prequest ) ) - { - prequest->ResetWaitForIO(); - } -} - template void THashedLRUKCache::RequestFinalizeIO( _In_ CRequest* const prequestIO ) { diff --git a/dev/ese/src/os/blockcache/_hashedlrukcachethreadlocalstorage.hxx b/dev/ese/src/os/blockcache/_hashedlrukcachethreadlocalstorage.hxx index ff400c32..ff156626 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcachethreadlocalstorage.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcachethreadlocalstorage.hxx @@ -62,8 +62,6 @@ class CHashedLRUKCacheThreadLocalStorage // ctls CritAsyncIOWorkerState().Enter(); m_ilRequests.InsertAsNextMost( prequest ); CritAsyncIOWorkerState().Leave(); - - AddRef(); } void RemoveRequest( _In_ CRequest* const prequest ) @@ -72,9 +70,6 @@ class CHashedLRUKCacheThreadLocalStorage // ctls m_ilRequests.Remove( prequest ); CritAsyncIOWorkerState().Leave(); - CHashedLRUKCacheThreadLocalStorage* pctlsT = this; - Release( &pctlsT ); - CRequest* prequestT = prequest; (void)CRequest::ErrRelease( &prequestT, JET_errSuccess ); } @@ -265,13 +260,6 @@ class CHashedLRUKCacheThreadLocalStorage // ctls OSFormatBoolean( fSignalNeeded ) ) ); } - static void CueAsyncIOWorker_( _In_ const DWORD_PTR keyIOComplete ) - { - CHashedLRUKCacheThreadLocalStorage* const pctls = (CHashedLRUKCacheThreadLocalStorage*)keyIOComplete; - - pctls->CueAsyncIOWorker(); - } - static void Release( _Inout_ CHashedLRUKCacheThreadLocalStorage** const ppctls ) { CCacheThreadLocalStorageBase::Release( (CCacheThreadLocalStorageBase** const)ppctls ); diff --git a/dev/ese/src/os/osblockcache.cxx b/dev/ese/src/os/osblockcache.cxx index 06081f1c..ebd08625 100644 --- a/dev/ese/src/os/osblockcache.cxx +++ b/dev/ese/src/os/osblockcache.cxx @@ -337,7 +337,6 @@ BOOL FOSBlockCachePreinit() void OSBlockCachePostterm() { - CFileWrapper::Cleanup(); - CFileFilterWrapper::Cleanup(); CFileFilter::Cleanup(); + CPoolRepository::CleanupAll(); } From 0ee9d1ffed6a7e43b58a1206b8ccfd4beea90228 Mon Sep 17 00:00:00 2001 From: Anil Ruia Date: Thu, 3 Nov 2022 15:47:57 +0000 Subject: [PATCH 081/102] When doing fCleanOneDb, skip RCE for another database if already skipped earlier RCE in chain [Substrate:87ebc9d674f60a7074df3059c09e6c8d3c78f58d] --- dev/ese/src/ese/ver.cxx | 7 +++++++ dev/ese/src/inc/ver.hxx | 9 +++++++++ 2 files changed, 16 insertions(+) diff --git a/dev/ese/src/ese/ver.cxx b/dev/ese/src/ese/ver.cxx index 5dd9557d..3d53cdee 100644 --- a/dev/ese/src/ese/ver.cxx +++ b/dev/ese/src/ese/ver.cxx @@ -6257,6 +6257,13 @@ ERR VER::ErrVERIRCEClean( const IFMP ifmp ) // Assert( fFalse ); } + + // If we are only cleaning one database, and this RCE belongs to another one and we already + // skipped the first RCE in the node chain, skip this one also. + if ( fCleanable && fCleanOneDb && prce->Ifmp() != ifmp && prce->FPastVersionsOfNode() ) + { + fCleanable = fFalse; + } } if ( !fCleanable ) diff --git a/dev/ese/src/inc/ver.hxx b/dev/ese/src/inc/ver.hxx index e12c6dc1..511a8a51 100644 --- a/dev/ese/src/inc/ver.hxx +++ b/dev/ese/src/inc/ver.hxx @@ -521,6 +521,7 @@ class RCE RCE *PrceNextOfSession () const; RCE *PrcePrevOfSession () const; BOOL FFutureVersionsOfNode () const; + BOOL FPastVersionsOfNode () const; RCE *PrceNextOfFCB () const; RCE *PrcePrevOfFCB () const; RCE *PrceUndoInfoNext () const; @@ -1081,6 +1082,14 @@ INLINE BOOL RCE::FFutureVersionsOfNode() const } +// ================================================================ +INLINE BOOL RCE::FPastVersionsOfNode() const +// ================================================================ +{ + return prceNil != m_prcePrevOfNode; +} + + // ================================================================ INLINE LEVEL RCE::Level() const // ================================================================ From eddcb5060916f1fbe9033f2fd8349f889d2eba08 Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Fri, 4 Nov 2022 02:21:37 +0000 Subject: [PATCH 082/102] Perf: fix OSTraceEmit to use file append mode This code currently uses a complex scheme with a mutex to sync access to the ESE.TXT trace file. This change opens the file with FILE_APPEND_DATA. This makes any WriteFile call an append. We can then simply just call WriteFile w/o any synchronization to append data. [Substrate:28117c8873a8d6c9d02aafae570a146aa5205f91] --- dev/ese/src/os/trace.cxx | 39 ++------------------------------------- 1 file changed, 2 insertions(+), 37 deletions(-) diff --git a/dev/ese/src/os/trace.cxx b/dev/ese/src/os/trace.cxx index b80ac891..d4b69be6 100644 --- a/dev/ese/src/os/trace.cxx +++ b/dev/ese/src/os/trace.cxx @@ -369,8 +369,6 @@ void OSFreeInfoStrings() // Tracing -const WCHAR g_wszMutexTrace[] = L"Global\\{5E5C36C0-5E7C-471f-84D7-110FDC1AFD0D}"; -HANDLE g_hMutexTrace = NULL; const WCHAR g_wszFileTrace[] = L"\\Debug\\ESE.TXT"; HANDLE g_hFileTrace = NULL; LOCAL PFNTRACEEMIT g_pfnTraceEmit = NULL; @@ -602,13 +600,7 @@ void __stdcall OSTraceEmit( const TRACETAG tag, const char* const szPrefixNYI, c if ( !g_fJetDebugTracing && g_hFileTrace ) { DWORD cbT; - WaitForSingleObjectEx( g_hMutexTrace, INFINITE, FALSE ); - const LARGE_INTEGER ibOffset = { 0, 0 }; - if ( SetFilePointerEx( g_hFileTrace, ibOffset, NULL, FILE_END ) ) - { - WriteFile( g_hFileTrace, szTrace, min( DWORD( -1 ), cchTrace ), &cbT, NULL ); - } - ReleaseMutex( g_hMutexTrace ); + WriteFile( g_hFileTrace, szTrace, min( DWORD( -1 ), cchTrace ), &cbT, NULL ); } } @@ -1294,11 +1286,6 @@ void OSTraceITerm() CloseHandle( g_hFileTrace ); g_hFileTrace = NULL; } - if ( g_hMutexTrace ) - { - CloseHandle( g_hMutexTrace ); - g_hMutexTrace = NULL; - } if ( g_fcsThreadTableInit ) { DeleteCriticalSection( &g_csThreadTable ); @@ -1313,7 +1300,6 @@ ERR ErrOSTraceIInit() WCHAR wszPathTrace[ cchPathTrace ]; Assert( NULL == g_fcsThreadTableInit ); - Assert( NULL == g_hMutexTrace ); Assert( NULL == g_hFileTrace ); if ( !( g_fcsThreadTableInit = InitializeCriticalSectionAndSpinCount( &g_csThreadTable, 1000 ) ) ) @@ -1325,7 +1311,7 @@ ERR ErrOSTraceIInit() OSStrCbAppendW( wszPathTrace, sizeof(wszPathTrace), g_wszFileTrace ); if ( ( g_hFileTrace = CreateFileW( wszPathTrace, - GENERIC_WRITE, + FILE_APPEND_DATA, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, @@ -1336,25 +1322,9 @@ ERR ErrOSTraceIInit() g_hFileTrace = NULL; } - else - { - // The mutex is only used to access the file. If we failed to open the file, then - // don't bother to open the mutex. - Assert( NULL != g_hFileTrace && INVALID_HANDLE_VALUE != g_hFileTrace ); - - if ( !( g_hMutexTrace = CreateMutexW( NULL, FALSE, g_wszMutexTrace ) ) && - !( g_hMutexTrace = CreateMutexW( NULL, FALSE, wcsrchr( g_wszMutexTrace, L'\\' ) + 1 ) ) ) - { - Call( ErrOSErrFromWin32Err( GetLastError() ) ); - } - } HandleError: AssertSz( INVALID_HANDLE_VALUE != g_hFileTrace, "g_hFileTrace should be NULL if it couldn't be opened." ); - AssertSz( ( ( NULL == g_hMutexTrace ) == ( NULL == g_hFileTrace ) ) - || err < JET_errSuccess, - "g_hMutexTrace (%p) and g_hFileTrace (%p) must both be NULL or non-NULL. Or that there was an error.", - g_hMutexTrace, g_hFileTrace ); // Since this is actually the VERY first trace out of the whole system, I'm attributing // it to the higher level SysInitTerm. @@ -1378,11 +1348,6 @@ ERR ErrOSTraceIInit() CloseHandle( g_hFileTrace ); g_hFileTrace = NULL; } - if ( g_hMutexTrace ) - { - CloseHandle( g_hMutexTrace ); - g_hMutexTrace = NULL; - } if ( g_fcsThreadTableInit ) { DeleteCriticalSection( &g_csThreadTable ); From d133f71026c275509dd166db4dd9628bc9d700dc Mon Sep 17 00:00:00 2001 From: Umair Ahmad Date: Fri, 4 Nov 2022 07:01:56 +0000 Subject: [PATCH 083/102] Refactor FCB creation, initialization and open 1/2 1. Separate out FCB creation from FUCB creation. Move FCB creation to the FILE layer, where catalog properties can be accessed. FCB initialization is separated into 2 parts. Required init, and optional init. 2. Provide a mechanism to refcount FCBs without opening FUCBs on them. The FILE layer guarantees that any pfcbs it passes around are properly refcount managed. This is change 1/2. Some CR feedback will come later in change 2/2. [Substrate:8e3b5f6f07f2590ae8fd539df59a088d8b66931e] --- dev/ese/src/ese/_log/logredo.cxx | 2 +- dev/ese/src/ese/bt.cxx | 275 +------------------------------ dev/ese/src/ese/cat.cxx | 137 +++++++-------- dev/ese/src/ese/dbshrink.cxx | 19 +-- dev/ese/src/ese/dbtask.cxx | 4 +- dev/ese/src/ese/dbutil.cxx | 66 ++++---- dev/ese/src/ese/dir.cxx | 57 ------- dev/ese/src/ese/esedump.cxx | 12 +- dev/ese/src/ese/fcb.cxx | 109 ++++++------ dev/ese/src/ese/fcreate.cxx | 137 ++++++++------- dev/ese/src/ese/fileopen.cxx | 252 +++++++++++++++++++++++++++- dev/ese/src/ese/lv.cxx | 9 +- dev/ese/src/ese/repair.cxx | 41 +++-- dev/ese/src/ese/space.cxx | 226 ++++++++++++++----------- dev/ese/src/inc/_bt.hxx | 2 - dev/ese/src/inc/bt.hxx | 57 ------- dev/ese/src/inc/cat.hxx | 6 +- dev/ese/src/inc/dir.hxx | 2 - dev/ese/src/inc/fcb.hxx | 44 +++-- dev/ese/src/inc/file.hxx | 65 +++++++- dev/ese/src/inc/fucb.hxx | 1 + dev/ese/src/inc/space.hxx | 2 +- 22 files changed, 739 insertions(+), 786 deletions(-) diff --git a/dev/ese/src/ese/_log/logredo.cxx b/dev/ese/src/ese/_log/logredo.cxx index c8f06f4a..fede9c3d 100644 --- a/dev/ese/src/ese/_log/logredo.cxx +++ b/dev/ese/src/ese/_log/logredo.cxx @@ -1005,7 +1005,7 @@ LOCAL ERR ErrLGRICreateFucb( // get fcb for table, if one exists // - pfcb = FCB::PfcbFCBGet( ifmp, pgnoFDP, &fcbsf, fTrue /* FIncrementRefCount */, fTrue /* fInitForRecovery */); + pfcb = FCB::PfcbFCBGet( ifmp, pgnoFDP, &fcbsf, fTrue /* FIncrementRefCount */ ); Assert( pfcbNil == pfcb || ( fcbsf & fcbsfInitialized ) ); if ( pfcbNil == pfcb ) { diff --git a/dev/ese/src/ese/bt.cxx b/dev/ese/src/ese/bt.cxx index 2abe6172..f78ea6ee 100644 --- a/dev/ese/src/ese/bt.cxx +++ b/dev/ese/src/ese/bt.cxx @@ -499,7 +499,6 @@ ERR ErrBTOpen( PIB *ppib, FCB *pfcb, FUCB **ppfucb, BOOL fAllowReuse ) FUCB *pfucb; Assert( pfcb != pfcbNil ); - Assert( pfcb->FInitialized() ); // In most cases, we should reuse a deferred-closed FUCB. The one // time we don't want to is if we're opening a space cursor. @@ -789,24 +788,7 @@ VOID BTClose( FUCB *pfucb ) pfcb->ResetDomainDenyWrite(); } - if ( !pfcb->FInitialized() ) - { - - // we own the FCB (we're closing because the FCB was created during - // a DIROpen() of a DIRCreateDirectory() or because an error - // occurred during FILEOpenTable()) - - // unlink the FUCB from the FCB without moving the FCB to the - // avail LRU list (this prevents the FCB from being purged) - - pfucb->u.pfcb->Unlink( pfucb, fTrue ); - - // synchronously purge the FCB - - pfcb->PrepareForPurge(); - pfcb->Purge(); - } - else if ( pfcb->FTypeTable() ) + if ( pfcb->FTypeTable() ) { // only table FCBs can be moved to the avail-LRU list @@ -7439,6 +7421,7 @@ ERR ErrBTFindFragmentedRange( ERR ErrBTDumpPageUsage( PIB * ppib, const IFMP ifmp, const PGNO pgnoFDP ) { ERR err = JET_errSuccess; + FCBRef fcbRef; FUCB * pfucb = pfucbNil; DIB dib; CSR * pcsr; @@ -7464,8 +7447,8 @@ ERR ErrBTDumpPageUsage( PIB * ppib, const IFMP ifmp, const PGNO pgnoFDP ) if ( pgnoNull != pgnoFDP ) { - - Call( ErrBTOpen( ppib, pgnoFDP, ifmp, &pfucb ) ); + Call( ErrFILEFcbGet( ppib, ifmp, pgnoFDP, objidNil, fcbRef ) ); + Call( ErrBTOpen( ppib, fcbRef.get(), &pfucb ) ); FUCBSetIndex( pfucb ); // we will be traversing the entire tree in order, preread all the pages @@ -7626,229 +7609,6 @@ ERR ErrBTDumpPageUsage( PIB * ppib, const IFMP ifmp, const PGNO pgnoFDP ) -// ****************************************************** -// SPECIAL OPERATIONS -// - - -INLINE ERR ErrBTICreateFCB( - PIB *ppib, - const IFMP ifmp, - const PGNO pgnoFDP, - const OBJID objidFDP, - const OPENTYPE opentype, - FUCB **ppfucb ) -{ - ERR err; - FCB *pfcb = pfcbNil; - FUCB *pfucb = pfucbNil; - - // create a new FCB - - CallR( FCB::ErrCreate( ppib, ifmp, pgnoFDP, &pfcb ) ); - - // the creation was successful - - Assert( pfcb->IsLocked() ); - Assert( pfcb->FTypeNull() ); // No fcbtype yet. - Assert( pfcb->Ifmp() == ifmp ); - Assert( pfcb->PgnoFDP() == pgnoFDP ); - Assert( !pfcb->FInitialized() ); - Assert( pfcb->WRefCount() == 0 ); - pfcb->Unlock(); - - Call( ErrFUCBOpen( ppib, ifmp, &pfucb ) ); - Call( pfcb->ErrLink( pfucb ) ); - - Assert( !pfcb->FSpaceInitialized() ); - Assert( openNew != opentype || objidNil == objidFDP ); - if ( openNew != opentype ) - { - if ( objidNil == objidFDP ) - { - Assert( openNormal == opentype ); - - // read space info into FCB cache, including objid - Call( ErrSPInitFCB( pfucb ) ); - Assert( g_fRepair || pfcb->FSpaceInitialized() ); - } - else - { - pfcb->SetObjidFDP( objidFDP ); - if ( openNormalNonUnique == opentype ) - { - pfcb->Lock(); - pfcb->SetNonUnique(); - pfcb->Unlock(); - } - else - { - Assert( pfcb->FUnique() ); // btree is initially assumed to be unique - Assert( openNormalUnique == opentype ); - } - Assert( !pfcb->FSpaceInitialized() ); - } - } - - if ( pgnoFDP == pgnoSystemRoot ) - { - // SPECIAL CASE: For database cursor, we've got all the - // information we need. - - // when opening db cursor, always force to check the root page - Assert( objidNil == objidFDP ); - if ( openNew == opentype ) - { - // objid will be set when we return to ErrSPCreate() - Assert( objidNil == pfcb->ObjidFDP() ); - } - else - { - Assert( objidSystemRoot == pfcb->ObjidFDP() ); - } - - // insert this FCB into the global list - - pfcb->InsertList(); - - // finish initializing this FCB - - pfcb->Lock(); - Assert( pfcb->FTypeNull() ); - pfcb->SetTypeDatabase(); - pfcb->CreateComplete(); - pfcb->Unlock(); - } - - *ppfucb = pfucb; - Assert( !Pcsr( pfucb )->FLatched() ); - - return err; - -HandleError: - Assert( pfcbNil != pfcb ); - Assert( !pfcb->FInitialized() ); - Assert( !pfcb->FInList() ); - Assert( !pfcb->FInLRU() ); - Assert( ptdbNil == pfcb->Ptdb() ); - Assert( pfcbNil == pfcb->PfcbNextIndex() ); - Assert( pidbNil == pfcb->Pidb() ); - - if ( pfucbNil != pfucb ) - { - if ( pfcbNil != pfucb->u.pfcb ) - { - Assert( pfcb == pfucb->u.pfcb ); - // We managed to link the FUCB to the FCB before we errored. - pfcb->Unlink( pfucb, fTrue ); - } - - // close the FUCB - FUCBClose( pfucb ); - } - - // synchronously purge the FCB - pfcb->PrepareForPurge( fFalse ); - pfcb->Purge( fFalse ); - - return err; -} - - -// ***************************************************** -// BTREE INTERNAL ROUTINES -// - -// opens a cursor on a tree rooted at pgnoFDP -// open cursor on corresponding FCB if it is in cache [common case] -// if FCB not in cache, create one, link with cursor -// and initialize FCB space info -// if fNew is set, this is a new tree, -// so do not initialize FCB space info -// fWillInitFCB: On a passive, is the caller planning to fully hydrate the placeholder FCB? -// -ERR ErrBTIOpen( - PIB *ppib, - const IFMP ifmp, - const PGNO pgnoFDP, - const OBJID objidFDP, - const OPENTYPE opentype, - FUCB **ppfucb, - BOOL fWillInitFCB ) -{ - ERR err; - FCB *pfcb; - FCBStateFlags fcbsf; - ULONG cRetries = 0; - PIBTraceContextScope tcScope = ppib->InitTraceContextScope( ); - tcScope->iorReason.SetIors( iorsBTOpen ); - -RetrieveFCB: - AssertTrack( cRetries != 100000, "TooManyFcbOpenRetries" ); - - // get the FCB for the given ifmp/pgnoFDP - - pfcb = FCB::PfcbFCBGet( ifmp, pgnoFDP, &fcbsf, fTrue, !fWillInitFCB ); - if ( pfcb == pfcbNil ) - { - - // the FCB does not exist - - Assert( fcbsfNone == fcbsf ); - - // try to create a new B-tree which will cause the creation of the new FCB - - err = ErrBTICreateFCB( ppib, ifmp, pgnoFDP, objidFDP, opentype, ppfucb ); - Assert( err <= JET_errSuccess ); // Shouldn't return warnings. - - if ( err == errFCBExists ) - { - - // we failed because someone else was racing to create - // the same FCB that we want, but they beat us to it - - // try to get the FCB again - - UtilSleep( 10 ); - cRetries++; - goto RetrieveFCB; - } - Call( err ); - - tcScope->nParentObjectClass = TceFromFUCB( *ppfucb ); - } - else - { - tcScope->nParentObjectClass = pfcb->TCE(); - - if ( fcbsf & fcbsfInitialized ) - { - Assert( pfcb->WRefCount() >= 1); - err = ErrBTOpen( ppib, pfcb, ppfucb ); - - // Cursor has been opened on FCB, so refcount should be - // at least 2 (one for cursor, one for call to PfcbFCBGet()). - // (if ErrBTOpen returns w/o error) - Assert( pfcb->WRefCount() > 1 || (1 == pfcb->WRefCount() && err < JET_errSuccess) ); - - pfcb->Release(); - } - else - { - FireWall( "DeprecatedSentinelFcbBtOpen" ); // Sentinel FCBs are believed deprecated - Assert( !FFMPIsTempDB( ifmp ) ); // Sentinels not used by sort/temp. tables. - - // If we encounter a sentinel, it means the - // table has been locked for subsequent deletion. - err = ErrERRCheck( JET_errTableLocked ); - } - } - -HandleError: - return err; -} - - // ************************************************* // movement operations // @@ -7905,33 +7665,6 @@ ERR ErrBTIGotoRoot( FUCB *pfucb, LATCH latch ) return JET_errSuccess; } -ERR ErrBTIOpenAndGotoRoot( PIB *ppib, const PGNO pgnoFDP, const IFMP ifmp, FUCB **ppfucb ) -{ - ERR err; - FUCB *pfucb; - - CallR( ErrBTIOpen( ppib, ifmp, pgnoFDP, objidNil, openNormal, &pfucb, fFalse ) ); - Assert( pfucbNil != pfucb ); - Assert( pfcbNil != pfucb->u.pfcb ); - Assert( pfucb->u.pfcb->FInitialized() ); - - err = ErrBTIGotoRoot( pfucb, latchRIW ); - if ( err < JET_errSuccess ) - { - BTClose( pfucb ); - } - else - { - Assert( latchRIW == Pcsr( pfucb )->Latch() ); - Assert( pcsrNil == pfucb->pcsrRoot ); - pfucb->pcsrRoot = Pcsr( pfucb ); - - *ppfucb = pfucb; - } - - return err; -} - // this is the uncommon case in the refresh logic // where we lost physical currency on page // diff --git a/dev/ese/src/ese/cat.cxx b/dev/ese/src/ese/cat.cxx index 91748af0..d8ca9491 100644 --- a/dev/ese/src/ese/cat.cxx +++ b/dev/ese/src/ese/cat.cxx @@ -457,17 +457,18 @@ INLINE ERR ErrCATICreateCatalogIndexes( FUCB *pfucbTableExtent; PGNO pgnoIndexFDP; FCB *pfcb = pfcbNil; + FCBRef fcbRef; // don't maintain secondary indexes on the shadow catalog. // Open cursor for space navigation - CallR( ErrDIROpen( ppib, pgnoFDPMSO, ifmp, &pfucbTableExtent ) ); + CallR( ErrFILEFcbGet( ppib, ifmp, pgnoFDPMSO, objidFDPMSO, fcbRef ) ); + CallR( ErrDIROpen( ppib, fcbRef.get(), &pfucbTableExtent ) ); pfcb = pfucbTableExtent->u.pfcb; Assert( pfucbTableExtent != pfucbNil ); Assert( !FFUCBVersioned( pfucbTableExtent ) ); // Verify won't be deferred closed. - Assert( pfcb != pfcbNil ); Assert( !pfcb->FInitialized() ); Assert( pfcb->Pidb() == pidbNil ); @@ -497,9 +498,9 @@ INLINE ERR ErrCATICreateCatalogIndexes( HandleError: Assert( pfcb->FInitialized() ); - Assert( pfcb->WRefCount() == 1 ); + Assert( pfcb->WRefCount() == 2 ); // 1 for fcbRef, 1 for pfucbTableExtent - // force the FCB to be uninitialized so it will be purged by DIRClose + // force the FCB to be uninitialized so it will be purged by FCBRef deleter pfcb->Lock(); pfcb->CreateCompleteErr( errFCBUnusable ); @@ -2247,6 +2248,7 @@ ERR ErrREPAIRCATCreate( ERR ErrCATCreate( PIB *ppib, const IFMP ifmp, const BOOL fReplayCreateDbImplicitly ) { ERR err; + FCBRef fcbRef; FUCB *pfucb = pfucbNil; PGNO pgnoFDP; PGNO pgnoFDPShadow; @@ -2274,7 +2276,8 @@ ERR ErrCATCreate( PIB *ppib, const IFMP ifmp, const BOOL fReplayCreateDbImplicit // allocate cursor // - CallR( ErrDIROpen( ppib, pgnoSystemRoot, ifmp, &pfucb ) ); + CallR( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRef ) ); + CallR( ErrDIROpen( ppib, fcbRef.get(), &pfucb ) ); Assert( pfucbNil != pfucb ); Assert( cpgMSOInitial > cpgTableMin ); Call( ErrDIRCreateDirectory( @@ -2318,6 +2321,8 @@ ERR ErrCATCreate( PIB *ppib, const IFMP ifmp, const BOOL fReplayCreateDbImplicit DIRClose( pfucb ); } + fcbRef.reset(); // do we need to release the FCB before trx rollback? + if( err < 0 ) { CallSx( ErrDIRRollback( ppib ), JET_errRollbackError ); @@ -5451,12 +5456,10 @@ LOCAL VOID CATIFreeSecondaryIndexes( FCB *pfcbSecondaryIndexes ) /* get index info of a system table index /**/ -ERR ErrCATInitCatalogFCB( FUCB *pfucbTable ) +ERR ErrCATInitCatalogFCB( PIB* ppib, FCB* pfcb ) { ERR err; - PIB *ppib = pfucbTable->ppib; - const IFMP ifmp = pfucbTable->ifmp; - FCB *pfcb = pfucbTable->u.pfcb; + const IFMP ifmp = pfcb->Ifmp(); TDB *ptdb = ptdbNil; IDB idb( PinstFromIfmp( ifmp ) ); UINT iIndex; @@ -5532,7 +5535,7 @@ ERR ErrCATInitCatalogFCB( FUCB *pfucbTable ) } else if ( !fShadow ) { - FUCB *pfucbSecondaryIndex; + FCBRef fcbRefSecondaryIndex; PGNO pgnoIndexFDP; OBJID objidIndexFDP; @@ -5558,46 +5561,33 @@ ERR ErrCATInitCatalogFCB( FUCB *pfucbTable ) } Assert( idb.FUnique() ); // all catalog indexes are unique - note redundant w/ above assert. - Call( ErrDIROpenNoTouch( - ppib, - ifmp, - pgnoIndexFDP, - objidIndexFDP, - fTrue, // all catalog indexes are unique - &pfucbSecondaryIndex, - fTrue ) ); // will initialize FCB - Assert( !pfucbSecondaryIndex->u.pfcb->FInitialized() || pfucbSecondaryIndex->u.pfcb->FInitedForRecovery() ); + Call( ErrFILEFcbGetNoTouch( ppib, ifmp, pgnoIndexFDP, objidIndexFDP, fcbRefSecondaryIndex ) ); - err = ErrFILEIInitializeFCB( + Assert( !fcbRefSecondaryIndex->FInitialized() || fcbRefSecondaryIndex->FInitedForRecovery() ); + Call( ErrFILEIInitializeFCB( ppib, ifmp, ptdb, - pfucbSecondaryIndex->u.pfcb, + fcbRefSecondaryIndex.get(), &idb, fFalse, pgnoIndexFDP, PSystemSpaceHints(eJSPHDefaultUserTable), - NULL ); - if ( err < 0 ) - { - DIRClose( pfucbSecondaryIndex ); - goto HandleError; - } - - pfucbSecondaryIndex->u.pfcb->SetPfcbNextIndex( pfcbSecondaryIndexes ); - pfcbSecondaryIndexes = pfucbSecondaryIndex->u.pfcb; + NULL ) ); - Assert( !pfucbSecondaryIndex->u.pfcb->FInList() ); + fcbRefSecondaryIndex->SetPfcbNextIndex( pfcbSecondaryIndexes ); + pfcbSecondaryIndexes = fcbRefSecondaryIndex.get(); - // mark the secondary index as being initialized successfully + Assert( !fcbRefSecondaryIndex->FInList() ); - pfucbSecondaryIndex->u.pfcb->Lock(); - pfucbSecondaryIndex->u.pfcb->CreateComplete(); - pfucbSecondaryIndex->u.pfcb->ResetInitedForRecovery(); - pfucbSecondaryIndex->u.pfcb->Unlock(); + // mark the secondary index fcb as being initialized successfully + // this protects it from being purged when fcbRef goes out of scope - DIRClose( pfucbSecondaryIndex ); + fcbRefSecondaryIndex->Lock(); + fcbRefSecondaryIndex->CreateComplete(); + fcbRefSecondaryIndex->ResetInitedForRecovery(); + fcbRefSecondaryIndex->Unlock(); } } @@ -7731,7 +7721,7 @@ LOCAL ERR ErrCATIInitIndexFCBs( } else { - FUCB *pfucbSecondaryIndex; + FCBRef fcbRefSecondaryIndex; Assert( pgnoIndexFDP != pfcb->PgnoFDP() || g_fRepair ); @@ -7743,48 +7733,36 @@ LOCAL ERR ErrCATIInitIndexFCBs( *pfSecondaryPgnoFDPLastSetRequired = true; } - Call( ErrDIROpenNoTouch( - ppib, - ifmp, - pgnoIndexFDP, - objidIndexFDP, - idb.FUnique(), - &pfucbSecondaryIndex, - fTrue ) ); // Will initialize FCB - Assert( !pfucbSecondaryIndex->u.pfcb->FInitialized() || pfucbSecondaryIndex->u.pfcb->FInitedForRecovery() ); - - err = ErrFILEIInitializeFCB( + Call( ErrFILEFcbGetNoTouch( ppib, ifmp, pgnoIndexFDP, objidIndexFDP, fcbRefSecondaryIndex ) ); + + Assert( !fcbRefSecondaryIndex->FInitialized() || fcbRefSecondaryIndex->FInitedForRecovery() ); + Call( ErrFILEIInitializeFCB( ppib, ifmp, ptdb, - pfucbSecondaryIndex->u.pfcb, + fcbRefSecondaryIndex.get(), &idb, fFalse, pgnoIndexFDP, &jsph, - pfcbTemplate ); - if ( err < 0 ) - { - DIRClose( pfucbSecondaryIndex ); - goto HandleError; - } - Assert( pfucbSecondaryIndex->u.pfcb->ObjidFDP() == objidIndexFDP ); + pfcbTemplate ) ); - pfucbSecondaryIndex->u.pfcb->SetFileTimePgnoFDPLastSet( ftPgnoFDPLastSet ); - pfucbSecondaryIndex->u.pfcb->SetPfcbNextIndex( pfcbSecondaryIndexes ); - pfcbSecondaryIndexes = pfucbSecondaryIndex->u.pfcb; + Assert( fcbRefSecondaryIndex->ObjidFDP() == objidIndexFDP ); - Assert( !pfucbSecondaryIndex->u.pfcb->FInList() ); + fcbRefSecondaryIndex->SetFileTimePgnoFDPLastSet( ftPgnoFDPLastSet ); + fcbRefSecondaryIndex->SetPfcbNextIndex( pfcbSecondaryIndexes ); + pfcbSecondaryIndexes = fcbRefSecondaryIndex.get(); - // mark the secondary index as being initialized successfully + Assert( !fcbRefSecondaryIndex->FInList() ); - pfucbSecondaryIndex->u.pfcb->Lock(); - pfucbSecondaryIndex->u.pfcb->SetInitialIndex(); - pfucbSecondaryIndex->u.pfcb->CreateComplete(); - pfucbSecondaryIndex->u.pfcb->ResetInitedForRecovery(); - pfucbSecondaryIndex->u.pfcb->Unlock(); + // mark the secondary index fcb as being initialized successfully + // this protects it from being purged when fcbRef goes out of scope - DIRClose( pfucbSecondaryIndex ); + fcbRefSecondaryIndex->Lock(); + fcbRefSecondaryIndex->SetInitialIndex(); + fcbRefSecondaryIndex->CreateComplete(); + fcbRefSecondaryIndex->ResetInitedForRecovery(); + fcbRefSecondaryIndex->Unlock(); } Assert( locOnCurBM == pfucbCatalog->locLogical ); @@ -8199,14 +8177,12 @@ LOCAL ERR ErrCATIInitCallbacks( return err; } -ERR ErrCATInitFCB( FUCB *pfucbTable, OBJID objidTable, const BOOL fSkipPgnoFDPLastSetTime ) +ERR ErrCATInitFCB( PIB* ppib, FCB* pfcb, OBJID objidTable, const BOOL fSkipPgnoFDPLastSetTime ) { ERR err; - PIB *ppib = pfucbTable->ppib; INST *pinst = PinstFromPpib( ppib ); - const IFMP ifmp = pfucbTable->ifmp; + const IFMP ifmp = pfcb->Ifmp(); FUCB *pfucbCatalog = pfucbNil; - FCB *pfcb = pfucbTable->u.pfcb; TDB *ptdb = ptdbNil; FCB *pfcbTemplateTable = pfcbNil; DATA dataField; @@ -8232,8 +8208,8 @@ ERR ErrCATInitFCB( FUCB *pfucbTable, OBJID objidTable, const BOOL fSkipPgnoFDPLa } Assert( !pfcb->FInitialized() || pfcb->FInitedForRecovery() ); - Assert( objidTable == pfucbTable->u.pfcb->ObjidFDP() - || objidNil == pfucbTable->u.pfcb->ObjidFDP() && g_fRepair ); + Assert( objidTable == pfcb->ObjidFDP() + || objidNil ==pfcb->ObjidFDP() && g_fRepair ); CallR( ErrCATOpen( ppib, ifmp, &pfucbCatalog ) ); Assert( pfucbNil != pfucbCatalog ); @@ -8650,11 +8626,9 @@ ERR ErrCATInitFCB( FUCB *pfucbTable, OBJID objidTable, const BOOL fSkipPgnoFDPLa -ERR ErrCATInitTempFCB( FUCB *pfucbTable ) +ERR ErrCATInitTempFCB( PIB* ppib, FCB* pfcb ) { ERR err; - PIB *ppib = pfucbTable->ppib; - FCB *pfcb = pfucbTable->u.pfcb; TDB *ptdb = ptdbNil; TCIB tcib; INST *pinst = PinstFromPpib( ppib ); @@ -8665,7 +8639,7 @@ ERR ErrCATInitTempFCB( FUCB *pfucbTable ) /* created, in which case there are no primary or secondary indexes yet. /**/ - CallR( ErrTDBCreate( pinst, pfucbTable->ifmp, &ptdb, &tcib ) ); + CallR( ErrTDBCreate( pinst, pfcb->Ifmp(), &ptdb, &tcib ) ); ptdb->SetLVChunkMost( (LONG)UlParam( JET_paramLVChunkSizeMost ) ); @@ -15766,6 +15740,7 @@ ERR ErrCATGetCursorsFromObjid( ERR err = JET_errSuccess; PGNO pgnoFDPParent = pgnoNull; PGNO pgnoFDP = pgnoNull; + FCBRef fcbRef; FUCB* pfucb = pfucbNil; FUCB* pfucbParent = pfucbNil; @@ -15775,7 +15750,8 @@ ERR ErrCATGetCursorsFromObjid( Assert( objidParent == objidNil ); Assert( sysobj == sysobjNil ); pgnoFDP = pgnoSystemRoot; - Call( ErrDIROpen( ppib, pgnoSystemRoot, ifmp, &pfucb ) ); + Call( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRef ) ); + Call( ErrDIROpen( ppib, fcbRef.get(), &pfucb ) ); pgnoFDPParent = pgnoNull; pfucbParent = pfucbNil; } @@ -15803,7 +15779,8 @@ ERR ErrCATGetCursorsFromObjid( if ( sysobj == sysobjTable ) { pgnoFDPParent = pgnoSystemRoot; - Call( ErrDIROpen( ppib, pgnoFDPParent, ifmp, &pfucbParent ) ); + Call( ErrFILEFcbGet( ppib, ifmp, pgnoFDPParent, objidSystemRoot, fcbRef ) ); + Call( ErrDIROpen( ppib, fcbRef.get(), &pfucbParent ) ); pgnoFDP = pgnoFDPTable; pfucb = pfucbTable; diff --git a/dev/ese/src/ese/dbshrink.cxx b/dev/ese/src/ese/dbshrink.cxx index e6741811..3e873ccd 100644 --- a/dev/ese/src/ese/dbshrink.cxx +++ b/dev/ese/src/ese/dbshrink.cxx @@ -1976,10 +1976,12 @@ LOCAL ERR ErrSHKIRootMoveCheck( const ROOTMOVE& rm, FUCB* const pfucb, const OBJ } // Get child's pgnoFDP. + FCBRef fcbRefChild; PGNO pgnoFDPChild = pgnoNull; Call( ErrCATSeekObjectByObjid( ppib, ifmp, objidTable, sysobjChild, objidChild, NULL, 0, &pgnoFDPChild ) ); - Call( ErrBTIOpen( ppib, ifmp, pgnoFDPChild, objidNil, openNormal, &pfucbChild, fFalse ) ); + Call( ErrFILEFcbGet( ppib, ifmp, pgnoFDPChild, objidChild, fcbRefChild ) ); + Call( ErrBTOpen( ppib, fcbRefChild.get(), &pfucbChild ) ); Call( ErrBTIGotoRoot( pfucbChild, latchRIW ) ); pfucbChild->pcsrRoot = Pcsr( pfucbChild ); @@ -2152,20 +2154,17 @@ ERR ErrSHKRootPageMove( // initialize helper variables. // - // Retrieve some metadata first by opening it at the BT level. - Call( ErrBTOpen( ppib, pgnoFDP, ifmp, &pfucb ) ); - pfcb = pfucb->u.pfcb; + // Retrieve some metadata first by opening an FCB. + { + FCBRef fcbRef; + Call( ErrFILEFcbGet( ppib, ifmp, pgnoFDP, objidNil, fcbRef ) ); // Determine whether or not this is a root object. - objid = pfcb->ObjidFDP(); + objid = fcbRef->ObjidFDP(); Call( ErrCATGetObjidMetadata( ppib, ifmp, objid, &objidTable, &sysobj ) ); fRootObject = ( sysobj == sysobjTable ); Assert( !!fRootObject == ( objid == objidTable ) ); - - // Close primitive cursor. - BTClose( pfucb ); - pfucb = pfucbNil; - pfcb = pfcbNil; + } if ( objid == pfmp->ObjidExtentPageCountCacheFDP() ) { diff --git a/dev/ese/src/ese/dbtask.cxx b/dev/ese/src/ese/dbtask.cxx index 8a505eb2..8d7c4ea7 100644 --- a/dev/ese/src/ese/dbtask.cxx +++ b/dev/ese/src/ese/dbtask.cxx @@ -1099,8 +1099,10 @@ VOID BATCHRECTASK::PrereadTaskBookmarks( PIB * const ppib, const INT itaskStart, BOOKMARK * rgbm; if( NULL != ( rgbm = new BOOKMARK[cbookmarksPreread] ) ) { + FCBRef fcbRef; FUCB * pfucb = pfucbNil; - if( JET_errSuccess == ErrDIROpen( ppib, m_pgnoFDP, m_ifmp, &pfucb ) ) + if ( JET_errSuccess == ErrFILEFcbGet( ppib, m_ifmp, m_pgnoFDP, objidNil, fcbRef ) && + JET_errSuccess == ErrDIROpen( ppib, fcbRef.get(), &pfucb ) ) { PIBTraceContextScope tcScope = ppib->InitTraceContextScope(); tcScope->iorReason.SetIort( iortRecTask ); diff --git a/dev/ese/src/ese/dbutil.cxx b/dev/ese/src/ese/dbutil.cxx index dc1ac32b..ff195e60 100644 --- a/dev/ese/src/ese/dbutil.cxx +++ b/dev/ese/src/ese/dbutil.cxx @@ -3026,6 +3026,7 @@ LOCAL ERR ErrEnumDataNodes( ) { ERR err; + FCBRef fcbRef; FUCB *pfucb = pfucbNil; BOOL fForceInit = fFalse; DIB dib; @@ -3033,31 +3034,32 @@ LOCAL ERR ErrEnumDataNodes( PGNO pgnoLastSeen = pgnoNull; CPG cpgSeen = 0; - CallR( ErrBTOpen( ppib, pgnoFDP, ifmp, &pfucb ) ); + CallR( ErrFILEFcbGet( ppib, ifmp, pgnoFDP, objidNil, fcbRef ) ); + CallR( ErrBTOpen( ppib, fcbRef.get(), &pfucb ) ); Assert( pfucbNil != pfucb ); - Assert( pfcbNil != pfucb->u.pfcb ); + Assert( pfcbNil != fcbRef.get() ); // This is shamelessly stolen from the space enumeration/printing code // that also walks a B-Tree during eseutil /ms, but its not clear if how // much of what we were doing here is goodness ... I read the lifecycle // of a FCB/FUCB doc, but it was still not clear. Haha, just kidding, I // would've read such a doc if it existed. - if ( !pfucb->u.pfcb->FInitialized() ) + if ( !fcbRef->FInitialized() ) { Assert( pgnoSystemRoot != pgnoFDP ); Assert( pgnoFDPMSO != pgnoFDP ); Assert( pgnoFDPMSO_NameIndex != pgnoFDP ); Assert( pgnoFDPMSO_RootObjectIndex != pgnoFDP ); - Assert( pfucb->u.pfcb->WRefCount() == 1 ); + Assert( fcbRef->WRefCount() == 2 ); - pfucb->u.pfcb->Lock(); + fcbRef->Lock(); // must force FCB to initialized state to allow SPGetInfo() to // open more cursors on the FCB -- this is safe because no // other thread should be opening this FCB - pfucb->u.pfcb->CreateComplete(); + fcbRef->CreateComplete(); - pfucb->u.pfcb->Unlock(); + fcbRef->Unlock(); fForceInit = fTrue; } @@ -3071,8 +3073,8 @@ LOCAL ERR ErrEnumDataNodes( BTUp( pfucb ); - if ( pfucb->u.pfcb->FPrimaryIndex() || - pfucb->u.pfcb->FTypeLV() || + if ( fcbRef->FPrimaryIndex() || + fcbRef->FTypeLV() || FFUCBSpace( pfucb ) ) { // we will be traversing the entire tree in order, preread all the pages @@ -3131,15 +3133,15 @@ LOCAL ERR ErrEnumDataNodes( if ( fForceInit ) { - Assert( pfucb->u.pfcb->WRefCount() == 1 ); + Assert( fcbRef->WRefCount() == 2 ); - pfucb->u.pfcb->Lock(); + fcbRef->Lock(); // force the FCB to be uninitialized so it will be purged by BTClose - pfucb->u.pfcb->CreateCompleteErr( errFCBUnusable ); + fcbRef->CreateCompleteErr( errFCBUnusable ); - pfucb->u.pfcb->Unlock(); + fcbRef->Unlock(); } BTClose( pfucb ); @@ -3231,11 +3233,13 @@ LOCAL ERR ErrDBUTLGetSpaceTreeInfo( CPRINTF * const pcprintf ) { ERR err; + FCBRef fcbRef; FUCB *pfucb = pfucbNil; BOOL fForceInit = fFalse; CPG rgcpgExtent[4]; - CallR( ErrBTOpen( ppib, pgnoFDP, ifmp, &pfucb ) ); + CallR( ErrFILEFcbGet( ppib, ifmp, pgnoFDP, objidFDP, fcbRef ) ); + CallR( ErrBTOpen( ppib, fcbRef.get(), &pfucb ) ); Assert( pfucbNil != pfucb ); Assert( pfcbNil != pfucb->u.pfcb ); @@ -3247,7 +3251,7 @@ LOCAL ERR ErrDBUTLGetSpaceTreeInfo( Assert( pgnoFDPMSO != pgnoFDP ); Assert( pgnoFDPMSO_NameIndex != pgnoFDP ); Assert( pgnoFDPMSO_RootObjectIndex != pgnoFDP ); - Assert( pfucb->u.pfcb->WRefCount() == 1 ); + Assert( pfucb->u.pfcb->WRefCount() == 2 ); // +1 for fcbRef, +1 for pfucb pfucb->u.pfcb->Lock(); @@ -3405,15 +3409,15 @@ LOCAL ERR ErrDBUTLGetSpaceTreeInfo( if ( fForceInit ) { - Assert( pfucb->u.pfcb->WRefCount() == 1 ); + Assert( fcbRef->WRefCount() == 2 ); - pfucb->u.pfcb->Lock(); + fcbRef->Lock(); - // force the FCB to be uninitialized so it will be purged by BTClose + // force the FCB to be uninitialized so it will be purged by FCBRef deleter - pfucb->u.pfcb->CreateCompleteErr( errFCBUnusable ); + fcbRef->CreateCompleteErr( errFCBUnusable ); - pfucb->u.pfcb->Unlock(); + fcbRef->Unlock(); } BTClose( pfucb ); @@ -5262,6 +5266,7 @@ LOCAL ERR ErrDBUTLIEstimateRootSpaceLeak( PIB* const ppib, const IFMP ifmp ) FUCB* pfucbSpaceTree = pfucbNil; FUCB* pfucbTable = pfucbNil; FUCB* pfucb = pfucbNil; + FCBRef fcbRef; szContext = "CheckInTrx"; if ( ppib->Level() > 0 ) @@ -5343,16 +5348,10 @@ LOCAL ERR ErrDBUTLIEstimateRootSpaceLeak( PIB* const ppib, const IFMP ifmp ) { // We are probably racing with table deletion. FCBStateFlags fcbsf = fcbsfNone; - OBJID objidFcb = objidNil; - const BOOL fFoundFcb = ( FCB::PfcbFCBGet( - ifmp, - pgnoFDPLast, - &fcbsf, - fFalse, // fIncrementRefCount - fTrue, // fInitForRecovery - &objidFcb ) != pfcbNil ) && - ( objidFcb == objidLast ); - const BOOL fDeletePending = fFoundFcb && ( ( fcbsf & fcbsfDeletePending ) != 0 ); + FCBRef fcbRefLast( FCB::PfcbFCBGet( ifmp, pgnoFDPLast, &fcbsf, fTrue /* fIncrementRefCount */ ) ); + const BOOL fFoundFcb = ( fcbRefLast.get() != pfcbNil && fcbRefLast->ObjidFDP() == objidLast ); + const BOOL fDeletePending = fFoundFcb && ( fcbsf & fcbsfDeletePending ); + fcbRefLast.reset(); if ( fFoundFcb ) { @@ -5476,10 +5475,13 @@ LOCAL ERR ErrDBUTLIEstimateRootSpaceLeak( PIB* const ppib, const IFMP ifmp ) szContext = "RootSpace"; // Open root. - Call( ErrBTIOpen( ppib, ifmp, pgnoSystemRoot, objidNil, openNormal, &pfucb, fFalse ) ); + Call( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRef ) ); + Assert( fcbRef->FInitialized() ); + Assert( fcbRef->FSpaceInitialized() ); + + Call( ErrBTOpen( ppib, fcbRef.get(), &pfucb ) ); Call( ErrBTIGotoRoot( pfucb, latchReadNoTouch ) ); pfucb->pcsrRoot = Pcsr( pfucb ); - Assert( pfucb->u.pfcb->FSpaceInitialized() ); // Root object. CPG rgcpgRootInfo[ 4 ] = { cpgNil }; diff --git a/dev/ese/src/ese/dir.cxx b/dev/ese/src/ese/dir.cxx index a98b12a4..3a590ae8 100644 --- a/dev/ese/src/ese/dir.cxx +++ b/dev/ese/src/ese/dir.cxx @@ -207,63 +207,6 @@ ERR ErrDIRCreateDirectory( // Open/Close routines // -// opens a cursor on given ifmp, pgnoFDP -// -ERR ErrDIROpen( PIB *ppib, PGNO pgnoFDP, IFMP ifmp, FUCB **ppfucb, BOOL fWillInitFCB ) -{ - ERR err; - FUCB *pfucb; - - CheckPIB( ppib ); - -#ifdef DEBUG - INST *pinst = PinstFromPpib( ppib ); - if ( !pinst->FRecovering() - && pinst->m_fSTInit == fSTInitDone - && !Ptls()->FIsTaskThread() - && !Ptls()->fIsRCECleanup ) - { - CheckDBID( ppib, ifmp ); - } -#endif - - CallR( ErrBTOpen( ppib, pgnoFDP, ifmp, &pfucb, openNormal, fWillInitFCB ) ); - DIRInitOpenedCursor( pfucb, pfucb->ppib->Level() ); - - // set return pfucb - // - *ppfucb = pfucb; - return JET_errSuccess; -} - -// open cursor, don't touch root page -ERR ErrDIROpenNoTouch( PIB *ppib, IFMP ifmp, PGNO pgnoFDP, OBJID objidFDP, BOOL fUnique, FUCB **ppfucb, BOOL fWillInitFCB ) -{ - ERR err; - FUCB *pfucb; - - CheckPIB( ppib ); - -#ifdef DEBUG - INST *pinst = PinstFromPpib( ppib ); - if ( !pinst->FRecovering() - && pinst->m_fSTInit == fSTInitDone - && !Ptls()->FIsTaskThread() - && !Ptls()->fIsRCECleanup ) - { - CheckDBID( ppib, ifmp ); - } -#endif - - CallR( ErrBTOpenNoTouch( ppib, ifmp, pgnoFDP, objidFDP, fUnique, &pfucb, fWillInitFCB ) ); - DIRInitOpenedCursor( pfucb, pfucb->ppib->Level() ); - - // set return pfucb - // - *ppfucb = pfucb; - return JET_errSuccess; -} - // open cursor on given FCB // ERR ErrDIROpen( PIB *ppib, FCB *pfcb, FUCB **ppfucb ) diff --git a/dev/ese/src/ese/esedump.cxx b/dev/ese/src/ese/esedump.cxx index 4ed03afa..4e1cd325 100644 --- a/dev/ese/src/ese/esedump.cxx +++ b/dev/ese/src/ese/esedump.cxx @@ -361,8 +361,7 @@ LOCAL ERR ErrESEDUMPIndexForOneTable(FUCB *pfucbTable, JET_GRBIT grbitESEDUMPMod // open it Call (ErrDIROpen( pfucbTable->ppib, - pfcb->PgnoFDP(), - pfucbTable->ifmp, + pfcb, &pfucbIndex )); Assert(pfucbIndex != pfucbNil ); @@ -727,15 +726,14 @@ LOCAL ERR ErrESEDUMPCheckAndDumpSpaceInfo(FUCB *pfucb, JET_GRBIT grbitESEDUMPMod // then call the space info function for this page LOCAL ERR ErrESEDUMPDatabaseInfo(PIB *ppib, IFMP ifmp, JET_GRBIT grbitESEDUMPMode) { + FCBRef fcbRef; FUCB *pfucbDb = pfucbNil; ERR err = JET_errSuccess; // open the table - CallR (ErrDIROpen( - ppib, - pgnoSystemRoot, - ifmp, - &pfucbDb )); + CallR( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRef ) ); + CallR( ErrDIROpen( ppib, fcbRef.get(), &pfucbDb ) ); + Assert( pfucbNil != pfucbDb ); Assert( pfcbNil != pfucbDb->u.pfcb ); Assert( pgnoSystemRoot == pfucbDb->u.pfcb->PgnoFDP()); diff --git a/dev/ese/src/ese/fcb.cxx b/dev/ese/src/ese/fcb.cxx index cd866ad5..91a10247 100644 --- a/dev/ese/src/ese/fcb.cxx +++ b/dev/ese/src/ese/fcb.cxx @@ -734,12 +734,11 @@ VOID FCB::UnlinkIDB( FCB *pfcbTable ) // NOTE: this is the proper channel for accessing an FCB; it uses the locking // protocol setup by the FCB hash-table and FCB latch -FCB *FCB::PfcbFCBGet( const IFMP ifmp, const PGNO pgnoFDP, FCBStateFlags* const pfcbsf, const BOOL fIncrementRefCount, const BOOL fInitForRecovery, OBJID* const pobjid ) +FCB *FCB::PfcbFCBGet( const IFMP ifmp, const PGNO pgnoFDP, FCBStateFlags* const pfcbsf, const BOOL fIncrementRefCount ) { FCBStateFlags fcbsf = fcbsfNone; INST *pinst = PinstFromIfmp( ifmp ); FCB *pfcbT; - OBJID objid = objidNil; FCBHash::ERR errFCBHash; FCBHash::CLock lockFCBHash; FCBHashKey keyFCBHash( ifmp, pgnoFDP ); @@ -799,6 +798,10 @@ FCB *FCB::PfcbFCBGet( const IFMP ifmp, const PGNO pgnoFDP, FCBStateFlags* const // using the existing abstraction. But I don't think it's as safe, since // I think that when we try to acquire the exclusive latch, we register // ourselves as next for the latch. + // SOMEONE: This can be simplified by not requiring a write latch on the fcb for refcount modification. + // If the refcount is interlocked incremented while holding the fcbhash latch, + // it will give the same lifetime guarantees for the fcb as currently implemented. + // Note that it can be interlocked decremented any time without holding the fcbhash latch (just like today). if ( pfcbT->FNeedLock_() ) { CSXWLatch::ERR errSXWLatch = pfcbT->m_sxwl.ErrAcquireExclusiveLatch(); @@ -882,41 +885,6 @@ FCB *FCB::PfcbFCBGet( const IFMP ifmp, const PGNO pgnoFDP, FCBStateFlags* const Assert( fcbsf == fcbsfNone ); fcbsf |= fcbsfInitialized; fcbsf |= ( pfcbT->FDeletePending() ? fcbsfDeletePending : fcbsfNone ); - objid = pfcbT->ObjidFDP(); - - // If this is the dummy FCB created by recovery, we need to fully populate - // it, make sure that the others wait while the first person finishes doing it - - if ( pfcbT != pfcbNil && !fInitForRecovery && pfcbT->FInitedForRecovery() ) - { - if ( !pfcbT->FDoingAdditionalInitializationDuringRecovery() ) - { - Assert( pfcbT->IsLocked_( LOCK_TYPE::ltWrite ) ); - pfcbT->SetDoingAdditionalInitializationDuringRecovery(); - } - else - { - // release write latch - pfcbT->Unlock_( LOCK_TYPE::ltWrite ); - - // FCB is not finished initializing - // update performance counter - - PERFOpt( cFCBCacheStalls.Inc( pinst ) ); - - // wait - - UtilSleep( 10 ); - - // try to get the FCB again - - fcbsf = fcbsfNone; - objid = objidNil; - - cRetries++; - goto RetrieveFCB; - } - } if ( pfcbT != pfcbNil ) { @@ -931,16 +899,11 @@ FCB *FCB::PfcbFCBGet( const IFMP ifmp, const PGNO pgnoFDP, FCBStateFlags* const SetStateAndReturn: // set the state Assert( ( pfcbT == pfcbNil ) == ( fcbsf == fcbsfNone ) ); // Pointer and flag must agree. - Assert( ( pfcbT == pfcbNil ) == ( objid == objidNil ) ); // Pointer and OBJID must agree. Assert( ( fcbsf == fcbsfNone ) || ( fcbsf & fcbsfInitialized ) ); // Can't have any flags set if it's not initialized. if ( pfcbsf ) { *pfcbsf = fcbsf; } - if ( pobjid ) - { - *pobjid = objid; - } // return the FCB Assert( ( pfcbNil == pfcbT ) || ( pfcbT->IsUnlocked_( LOCK_TYPE::ltShared ) && pfcbT->IsUnlocked_( LOCK_TYPE::ltWrite ) ) ); @@ -948,6 +911,48 @@ FCB *FCB::PfcbFCBGet( const IFMP ifmp, const PGNO pgnoFDP, FCBStateFlags* const } +// ========================================================================= +// FCB Init during recovery support. + +// Acquires mskFCBDoingAdditionalInitializationDuringRecovery locklessly. +// Returns fTrue for the first thread through, fFalse otherwise. +VOID FCB::AcquireAdditionalInitDuringRecovery() +{ + INST* pinst = PinstFromIfmp( Ifmp() ); + + for ( INT cRetries = 0; true; cRetries++ ) + { + // This could've been done locklessly but it is pointless. + // If another thread acquires this flag, then it is initializing the fcb under the write lock (a potentially heavy operation). + // We don't gain anything by spinning the cpu during that time. Better to wait for the write latch. + + Lock_( LOCK_TYPE::ltWrite ); + if ( !( m_ulFCBFlags & mskFCBDoingAdditionalInitializationDuringRecovery ) ) + { + m_ulFCBFlags |= mskFCBDoingAdditionalInitializationDuringRecovery; + Unlock_( LOCK_TYPE::ltWrite ); + return; + } + + Unlock_( LOCK_TYPE::ltWrite ); + + // Someone else is initializing the fcb + // update performance counter + PERFOpt( cFCBCacheStalls.Inc( pinst ) ); + AssertTrack( cRetries != 100000, "TooManyAdditionalInitDuringRecoveryRetries" ); + + // wait + UtilSleep( 10 ); + } +} + +VOID FCB::ReleaseAdditionalInitDuringRecovery() +{ + Assert( IsLocked_( LOCK_TYPE::ltWrite ) ); + m_ulFCBFlags &= ( ~mskFCBDoingAdditionalInitializationDuringRecovery ); +} + + // create a new FCB // // this function allocates an FCB and possibly recycles unused FCBs for later @@ -1173,7 +1178,7 @@ BOOL FCB::FScanAndPurge_( PERFOpt( cFCBAsyncScan.Inc( pinst, tce ) ); - if ( pfcbToPurge->FCheckFreeAndPurge_( ppib, fThreshold ) ) + if ( pfcbToPurge->FCheckFreeAndPurge_( fThreshold ) ) { // pfcbPurge is now gone. @@ -1400,10 +1405,9 @@ enum FCBPurgeFailReason : BYTE // fcbpfr // etc... (everything that makes it free), we can purge the FCB BOOL FCB::FCheckFreeAndPurge_( - _In_ PIB *ppib, _In_ const BOOL fThreshold ) { - INST *pinst = PinstFromPpib( ppib ); + INST *pinst = PinstFromIfmp( Ifmp() ); Assert( pinst->m_critFCBList.FOwner() ); Assert( IsUnlocked_( LOCK_TYPE::ltShared ) ); @@ -1449,11 +1453,6 @@ BOOL FCB::FCheckFreeAndPurge_( fFCBPossiblyFree = fFalse; fcbpfr = fcbpfrDeletePending; } - else if ( FDomainDenyRead( ppib ) ) - { - fFCBPossiblyFree = fFalse; - fcbpfr = fcbpfrDomainDenyRead; - } else if ( FOutstandingVersions_() ) { fFCBPossiblyFree = fFalse; @@ -1471,6 +1470,8 @@ BOOL FCB::FCheckFreeAndPurge_( } else { + EnforceSz( m_crefDomainDenyRead == 0, "FCBPurge_BadDenyReadRef" ); + EnforceSz( m_crefDomainDenyWrite == 0, "FCBPurge_BadDenyWriteRef" ); fFCBPossiblyFree = fTrue; } @@ -3168,9 +3169,9 @@ VOID FCBAssertAllClean( INST *pinst ) // under the assumption that the FCB you are refcounting will // not suddenly disappear (e.g. you own a cursor on it or know // for a fact that someone else does and they will not close it) -VOID FCB::IncrementRefCount() +VOID FCB::IncrementRefCount( BOOL fOwnWriteLock /* = fFalse */ ) { - IncrementRefCount_( fFalse ); + IncrementRefCount_( fOwnWriteLock ); } VOID FCB::IncrementRefCount_( BOOL fOwnWriteLock ) @@ -3285,13 +3286,13 @@ VOID FCB::DecrementRefCountAndUnlink_( FUCB *pfucb, const BOOL fLockList, const Unlock_( LOCK_TYPE::ltWrite ); - if ( fTryPurge && ( pfucbNil != pfucb ) && FTryPurgeOnClose() ) + if ( fTryPurge && FTryPurgeOnClose() ) { // We unlinked an FUCB from a table, and it was the last thing with // a refcount on the table. Try to purge the FCB. If we succeed, // it has to be the last reference to "this", as it may have // been purged. - BOOL fPurgeable = FCheckFreeAndPurge_( pfucb->ppib, fFalse ); + BOOL fPurgeable = FCheckFreeAndPurge_( fFalse ); if ( fPurgeable ) { diff --git a/dev/ese/src/ese/fcreate.cxx b/dev/ese/src/ese/fcreate.cxx index fada7fb9..d2090bdc 100644 --- a/dev/ese/src/ese/fcreate.cxx +++ b/dev/ese/src/ese/fcreate.cxx @@ -1856,6 +1856,7 @@ LOCAL ERR ErrFILEICreateIndexes( FCB * pfcbTemplateTable ) { ERR err = JET_errSuccess; + FCBRef fcbRefTableExtent; FUCB *pfucbTableExtent = pfucbNil; FUCB *pfucbCatalog = pfucbNil; CHAR szIndexName[ JET_cbNameMost+1 ]; @@ -1894,20 +1895,22 @@ LOCAL ERR ErrFILEICreateIndexes( } // Open cursor for space navigation - CallR( ErrDIROpen( ppib, pgnoTableFDP, ifmp, &pfucbTableExtent ) ); + CallR( ErrFILEFcbGet( ppib, ifmp, pgnoTableFDP, objidTable, fcbRefTableExtent ) ); + Assert( !fcbRefTableExtent->FInitialized() ); + + // force the FCB to be initialized successfully + + fcbRefTableExtent->Lock(); + fcbRefTableExtent->SetTypeTable(); + fcbRefTableExtent->CreateComplete(); + fcbRefTableExtent->Unlock(); + + CallR( ErrDIROpen( ppib, fcbRefTableExtent.get(), &pfucbTableExtent ) ); Assert( pfucbNil != pfucbTableExtent ); Assert( !FFUCBVersioned( pfucbTableExtent ) ); // Verify won't be deferred closed. Assert( pfcbNil != pfucbTableExtent->u.pfcb ); - Assert( !pfucbTableExtent->u.pfcb->FInitialized() ); Assert( pfucbTableExtent->u.pfcb->Pidb() == pidbNil ); - // force the FCB to be initialized successfully - - pfucbTableExtent->u.pfcb->Lock(); - pfucbTableExtent->u.pfcb->SetTypeTable(); - pfucbTableExtent->u.pfcb->CreateComplete(); - pfucbTableExtent->u.pfcb->Unlock(); - Call( ErrCATOpen( ppib, ifmp, &pfucbCatalog ) ); Assert( pfucbNil != pfucbCatalog ); @@ -2246,9 +2249,9 @@ LOCAL ERR ErrFILEICreateIndexes( } Assert( pfucbTableExtent != pfucbNil ); - Assert( pfucbTableExtent->u.pfcb->WRefCount() == 1 ); + Assert( pfucbTableExtent->u.pfcb->WRefCount() == 2 ); // +1 for fcbRef, +1 for open fucb - // force the FCB to be uninitialized so it will be purged by DIRClose + // force the FCB to be uninitialized so it will be purged by the FCBRef .dtor pfucbTableExtent->u.pfcb->Lock(); pfucbTableExtent->u.pfcb->CreateCompleteErr( errFCBUnusable ); @@ -2283,6 +2286,7 @@ LOCAL ERR ErrFILEIInheritIndexes( FCB * pfcbTemplateTable ) { ERR err = JET_errSuccess; + FCBRef fcbRefTableExtent; FUCB *pfucbTableExtent = pfucbNil; FUCB *pfucbCatalog = pfucbNil; TDB *ptdbTemplateTable; @@ -2305,7 +2309,8 @@ LOCAL ERR ErrFILEIInheritIndexes( Assert( pfcbTemplateTable->FTemplateTable() ); // Open cursor for space navigation - CallR( ErrDIROpen( ppib, pgnoTableFDP, ifmp, &pfucbTableExtent ) ); + CallR( ErrFILEFcbGet( ppib, ifmp, pgnoTableFDP, objidTable, fcbRefTableExtent ) ); + CallR( ErrDIROpen( ppib, fcbRefTableExtent.get(), &pfucbTableExtent ) ); Assert( pfucbNil != pfucbTableExtent ); Assert( !FFUCBVersioned( pfucbTableExtent ) ); // Verify won't be deferred closed. Assert( pfcbNil != pfucbTableExtent->u.pfcb ); @@ -2418,9 +2423,9 @@ LOCAL ERR ErrFILEIInheritIndexes( } Assert( pfucbTableExtent != pfucbNil ); - Assert( pfucbTableExtent->u.pfcb->WRefCount() == 1 ); + Assert( pfucbTableExtent->u.pfcb->WRefCount() == 2 ); // +1 for fcbRef, +1 for pfucbTableExtent - // force the FCB to be uninitialized so it will be purged by DIRClose + // force the FCB to be uninitialized so it will be purged by fcbRefTableExtent .dtor pfucbTableExtent->u.pfcb->Lock(); pfucbTableExtent->u.pfcb->CreateCompleteErr( errFCBUnusable ); @@ -2776,7 +2781,10 @@ ERR ErrFILECreateTable( PIB *ppib, IFMP ifmp, JET_TABLECREATE5_A *ptablecreate, // allocate cursor // - Call( ErrDIROpen( ppib, pgnoSystemRoot, ifmp, &pfucb ) ); + { + FCBRef fcbRef; + Call( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRef ) ); + Call( ErrDIROpen( ppib, fcbRef.get(), &pfucb ) ); Call( ErrDIRCreateDirectory( pfucb, CpgInitial( &jsphPrimaryAlloc, g_rgfmp[ ifmp ].CbPage() ), @@ -2786,6 +2794,7 @@ ERR ErrFILECreateTable( PIB *ppib, IFMP ifmp, JET_TABLECREATE5_A *ptablecreate, fSPFlags | ( FFMPIsTempDB( ifmp ) ? fSPUnversionedExtent : 0 ) ) ); // For temp. tables, create unversioned extents DIRClose( pfucb ); pfucb = pfucbNil; + } Assert( ptablecreate->cCreated == 0 ); ptablecreate->cCreated = 1; @@ -6568,7 +6577,7 @@ LOCAL ERR ErrFILEIUpdateIndex( PIB *ppib, FUCB *pfucbTable, FUCB *pfucbIndex ) LOCAL ERR ErrFILEIPrepareOneIndex( PIB * const ppib, FUCB * const pfucbTable, - FUCB ** ppfucbIdx, + FCBRef * poutFcbRefIdx, JET_INDEXCREATE3_A * const pidxcreate, const CHAR * const szIndexName, const CHAR * rgszColumns[], @@ -6580,6 +6589,7 @@ LOCAL ERR ErrFILEIPrepareOneIndex( const IFMP ifmp = pfucbTable->ifmp; FCB * const pfcb = pfucbTable->u.pfcb; FCB * pfcbIdx = pfcbNil; + FCBRef fcbRefIdx; PGNO pgnoIndexFDP; OBJID objidIndex; FIELD * pfield; @@ -6882,7 +6892,6 @@ LOCAL ERR ErrFILEIPrepareOneIndex( // DIRBeforeFirst( pfucbTable ); - Assert( pfucbNil == *ppfucbIdx ); Assert( pfcbIdx == pfcbNil ); } else @@ -6905,10 +6914,8 @@ LOCAL ERR ErrFILEIPrepareOneIndex( // get pfcb of index directory // - Call( ErrDIROpen( ppib, pgnoIndexFDP, ifmp, ppfucbIdx ) ); - Assert( *ppfucbIdx != pfucbNil ); - Assert( !FFUCBVersioned( *ppfucbIdx ) ); // Verify won't be deferred closed. - pfcbIdx = (*ppfucbIdx)->u.pfcb; + Call( ErrFILEFcbGet( ppib, ifmp, pgnoIndexFDP, objidIndex, fcbRefIdx ) ); + pfcbIdx = fcbRefIdx.get(); Assert( !pfcbIdx->FInitialized() ); Assert( pfcbIdx->Pidb() == pidbNil ); @@ -6962,10 +6969,6 @@ LOCAL ERR ErrFILEIPrepareOneIndex( pfcbIdx->Lock(); pfcbIdx->CreateCompleteErr( errFCBUnusable ); pfcbIdx->Unlock(); - - // verify that the FUCB will not be defer-closed - - Assert( !FFUCBVersioned( *ppfucbIdx ) ); } goto HandleError; } @@ -6985,6 +6988,9 @@ LOCAL ERR ErrFILEIPrepareOneIndex( rgidxsegConditional, pspacehints ) ); + // Return fcb to caller + *poutFcbRefIdx = std::move( fcbRefIdx ); + HandleError: if ( fCleanupIDB ) { @@ -7055,6 +7061,7 @@ LOCAL ERR VTAPI ErrFILEICreateIndex( FUCB * pfucb = pfucbNil; FUCB * pfucbIdx = pfucbNil; FCB * const pfcb = pfucbTable->u.pfcb; + FCBRef fcbRefIdx; FCB * pfcbIdx = pfcbNil; IDB idb( pinst ); CHAR szIndexName[ JET_cbNameMost+1 ]; @@ -7212,7 +7219,7 @@ LOCAL ERR VTAPI ErrFILEICreateIndex( Call( ErrFILEIPrepareOneIndex( ppib, pfucb, - &pfucbIdx, + &fcbRefIdx, pidxcreate, szIndexName, rgszColumns, @@ -7223,14 +7230,14 @@ LOCAL ERR VTAPI ErrFILEICreateIndex( if ( fPrimary ) { Assert( pfucbNil == pfucbIdx ); + Assert( pfcbNil == fcbRefIdx.get() ); Assert( pfcbNil == pfcbIdx ); } else { - Assert( pfucbNil != pfucbIdx ); - pfcbIdx = pfucbIdx->u.pfcb; + Assert( pfcbNil != fcbRefIdx.get() ); + pfcbIdx = fcbRefIdx.get(); - Assert( pfcbNil != pfcbIdx ); Assert( pfcbIdx->FTypeSecondaryIndex() ); if ( pidxcreate->grbit & JET_bitIndexEmpty ) @@ -7277,6 +7284,7 @@ LOCAL ERR VTAPI ErrFILEICreateIndex( const IDBFLAG idbflagPersisted = idb.FPersistedFlags(); const IDXFLAG idbflagPersistedX = idb.FPersistedFlagsX(); + Call( ErrDIROpen( ppib, pfcbIdx, &pfucbIdx ) ); FUCBSetIndex( pfucbIdx ); FUCBSetSecondary( pfucbIdx ); @@ -7463,15 +7471,17 @@ LOCAL ERR VTAPI ErrFILEICreateIndex( pfcbIdx->Pidb()->FPersistedFlags(), pfcbIdx->Pidb()->FPersistedFlagsX() ) ); } - } - // FCB now linked into table's index list, which guarantees that - // it will be available at Commit/Rollback time, so we can dispose - // of the index cursor. - Assert( !FFUCBVersioned( pfucbIdx ) ); // no versioned operations should have occurred on this cursor - Assert( pfucbNil != pfucbIdx ); - DIRClose( pfucbIdx ); - pfucbIdx = pfucbNil; + // FCB now linked into table's index list, which guarantees that + // it will be available at Commit/Rollback time, so we can dispose + // of the index cursor. + Assert( pfucbNil != pfucbIdx ); + Assert( !FFUCBVersioned( pfucbIdx ) ); // no versioned operations should have occurred on this cursor + DIRClose( pfucbIdx ); + pfucbIdx = pfucbNil; + fcbRefIdx.reset(); + pfcbIdx = pfcbNil; + } } Call( ErrDIRCommitTransaction( ppib, ( pidxcreate->grbit & JET_bitIndexLazyFlush ) ? JET_bitCommitLazyFlush : 0 ) ); @@ -7509,6 +7519,7 @@ LOCAL ERR VTAPI ErrFILEICreateIndex( DIRClose( pfucbIdx ); } + fcbRefIdx.reset(); // rollback has to purge the fcb, no refs should remain CallSx( ErrDIRRollback( ppib ), JET_errRollbackError ); DIRClose( pfucb ); @@ -7578,7 +7589,7 @@ LOCAL ERR VTAPI ErrFILEIBatchCreateIndex( BOOL fInTransaction = fFalse; BOOL fLazyCommit = fTrue; ULONG iindex; - FUCB ** rgpfucbIdx = NULL; + FCBRef* rgFcbRef = NULL; JET_INDEXCREATE3_A *pidxcreateT = NULL; JET_INDEXCREATE3_A *pidxcreateNext = NULL; @@ -7633,11 +7644,10 @@ LOCAL ERR VTAPI ErrFILEIBatchCreateIndex( return JET_errSuccess; } - AllocR( rgpfucbIdx = (FUCB **)PvOSMemoryHeapAlloc( sizeof(FUCB *) * cIndexes ) ); - memset( rgpfucbIdx, 0, sizeof(FUCB *) * cIndexes ); + AllocR( rgFcbRef = new FCBRef[ cIndexes ] ); // Temporarily open new table cursor. - CallJ( ErrDIROpen( ppib, pfcb, &pfucb ), Cleanup ); + Call( ErrDIROpen( ppib, pfcb, &pfucb ) ); FUCBSetIndex( pfucb ); FUCBSetMayCacheLVCursor( pfucb ); @@ -7711,7 +7721,7 @@ LOCAL ERR VTAPI ErrFILEIBatchCreateIndex( Call( ErrFILEIPrepareOneIndex( ppib, pfucb, - &rgpfucbIdx[iindex], + &rgFcbRef[ iindex ], pidxcreateT, szIndexName, rgszColumns, @@ -7719,7 +7729,7 @@ LOCAL ERR VTAPI ErrFILEIBatchCreateIndex( &idb, &jsphIndex ) ); - pfcbIndexT = rgpfucbIdx[iindex]->u.pfcb; + pfcbIndexT = rgFcbRef[ iindex ].get(); Assert( pfcbIndexT->FTypeSecondaryIndex() ); pfcbIndexT->SetPfcbNextIndex( pfcbIndexes ); pfcbIndexT->SetPfcbTable( pfcb ); @@ -7736,10 +7746,10 @@ LOCAL ERR VTAPI ErrFILEIBatchCreateIndex( pfcb->EnterDDL(); - Assert( pfcbNil == rgpfucbIdx[0]->u.pfcb->PfcbNextIndex() ); + Assert( pfcbNil == rgFcbRef[ 0 ]->PfcbNextIndex() ); Assert( cIndexes > 0 ); - Assert( pfcbIndexes == rgpfucbIdx[cIndexes-1]->u.pfcb ); - rgpfucbIdx[0]->u.pfcb->SetPfcbNextIndex( pfcb->PfcbNextIndex() ); + Assert( pfcbIndexes == rgFcbRef[ cIndexes - 1 ].get() ); + rgFcbRef[ 0 ]->SetPfcbNextIndex( pfcb->PfcbNextIndex() ); pfcb->SetPfcbNextIndex( pfcbIndexes ); FILESetAllIndexMask( pfcb ); @@ -7748,10 +7758,7 @@ LOCAL ERR VTAPI ErrFILEIBatchCreateIndex( for ( iindex = 0; iindex < cIndexes; iindex++ ) { - FUCB * const pfucbIndexT = rgpfucbIdx[iindex]; - Assert( pfucbNil != pfucbIndexT ); - - FCB * const pfcbIndexT = pfucbIndexT->u.pfcb; + FCB* const pfcbIndexT = rgFcbRef[ iindex ].get(); Assert( pfcbNil != pfcbIndexT ); Assert( pfcbIndexT->FTypeSecondaryIndex() ); @@ -7759,11 +7766,9 @@ LOCAL ERR VTAPI ErrFILEIBatchCreateIndex( pfcbIndexT->Lock(); pfcbIndexT->ResetDontLogSpaceOps(); pfcbIndexT->Unlock(); - - Assert( !FFUCBVersioned( pfucbIndexT ) ); // No versioned operations should have been performed, so won't be defer-closed. - DIRClose( pfucbIndexT ); } + delete[] rgFcbRef; Call( ErrDIRCommitTransaction( ppib, fLazyCommit ? JET_bitCommitLazyFlush : 0 ) ); fInTransaction = fFalse; @@ -7775,30 +7780,16 @@ LOCAL ERR VTAPI ErrFILEIBatchCreateIndex( { Assert( err < 0 ); // Must have hit an error. - if ( NULL != rgpfucbIdx ) - { - for ( iindex = 0; iindex < cIndexes; iindex++ ) - { - if ( pfucbNil != rgpfucbIdx[iindex] ) - { - Assert( !FFUCBVersioned( rgpfucbIdx[iindex] ) ); // No versioned operations should have been performed, so won't be defer-closed. - DIRClose( rgpfucbIdx[iindex] ); - } - } - } - + delete[] rgFcbRef; CallSx( ErrDIRRollback( ppib ), JET_errRollbackError ); } - DIRClose( pfucb ); - AssertDIRNoLatch( ppib ); - -Cleanup: - if ( NULL != rgpfucbIdx ) + if ( pfucb != pfucbNil ) { - OSMemoryHeapFree( rgpfucbIdx ); + DIRClose( pfucb ); } + AssertDIRNoLatch( ppib ); return err; } @@ -8568,6 +8559,7 @@ ERR ErrFILEDeleteTable( PIB *ppib, IFMP ifmp, const CHAR *szName, const BOOL fAl FUCB *pfucb = pfucbNil; FUCB *pfucbParent = pfucbNil; FCB *pfcb = pfcbNil; + FCBRef fcbRefParent; OBJID objidTable; CHAR szTable[JET_cbNameMost+1]; BOOL fInUseBySystem; @@ -8616,7 +8608,8 @@ ERR ErrFILEDeleteTable( PIB *ppib, IFMP ifmp, const CHAR *szName, const BOOL fAl // open cursor on database and seek to table without locking // - Call( ErrDIROpen( ppib, pgnoSystemRoot, ifmp, &pfucbParent ) ); + Call( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRefParent ) ); + Call( ErrDIROpen( ppib, fcbRefParent.get(), &pfucbParent ) ); { JET_GRBIT grbitOpen = JET_bitTableDelete | JET_bitTableDenyRead; diff --git a/dev/ese/src/ese/fileopen.cxx b/dev/ese/src/ese/fileopen.cxx index d726dc61..882fc15f 100644 --- a/dev/ese/src/ese/fileopen.cxx +++ b/dev/ese/src/ese/fileopen.cxx @@ -1420,12 +1420,14 @@ ERR ErrFILEIOpenTable( ERR err; ERR wrnSurvives = JET_errSuccess; FUCB *pfucb = pfucbNil; - FCB *pfcb; + FCB *pfcb = pfcbNil; + FCBRef fcbRef; CHAR szTable[JET_cbNameMost+1]; PGNO pgnoFDP = pgnoNull; OBJID objidTable = objidNil; BOOL fInTransaction = fFalse; BOOL fInitialisedCursor = fFalse; + BOOL fAddlInitDuringRecovery = fFalse; TABLECLASS tableclass = tableclassNone; Assert( ppib != ppibNil ); @@ -1617,7 +1619,8 @@ ERR ErrFILEIOpenTable( Assert( objidNil != objidTable ); Assert( objidTable > objidSystemRoot ); - Call( ErrDIROpenNoTouch( ppib, ifmp, pgnoFDP, objidTable, fTrue, &pfucb, fTrue ) ); + Call( ErrFILEFcbGetNoTouch( ppib, ifmp, pgnoFDP, objidTable, fcbRef ) ); + Call( ErrDIROpen( ppib, fcbRef.get(), &pfucb ) ); Assert( pfucbNil != pfucb ); pfcb = pfucb->u.pfcb; @@ -1678,8 +1681,20 @@ ERR ErrFILEIOpenTable( // if we're opening after table creation, the FCB shouldn't be initialised Assert( !( grbit & JET_bitTableCreate ) || !pfcb->FInitialized() ); - // Only one thread could possibly get to this point with an uninitialized - // FCB, which is why we don't have to grab the FCB's critical section. + if ( pfcb->FInitedForRecovery() ) + { + // If we find a partially initialized FCB (created by redo), we need to fully initialize it. + // Acquire additional init flag, which would only allow 1 thread at a time through for initialization. + // The first thread through will go in to the init block below. + Assert( pfcb->FInitialized() ); // FCB is in a quasi-initialized state + + pfcb->AcquireAdditionalInitDuringRecovery(); + fAddlInitDuringRecovery = fTrue; + } + + // Only one thread (the one that created a new FCB) could possibly get to this point with an uninitialized FCB, + // because FCB::PFcbGet() doesn't return uninitalized FCBs (it spin-waits for them to be initialized). + // Which is why we don't have to grab the FCB's critical section. if ( !pfcb->FInitialized() || pfcb->FInitedForRecovery() ) { if ( fInTransaction ) @@ -1693,12 +1708,12 @@ ERR ErrFILEIOpenTable( switch ( ttSubject ) { case tt::System: - Call( ErrCATInitCatalogFCB( pfucb ) ); + Call( ErrCATInitCatalogFCB( ppib, pfcb ) ); break; case tt::Temp: Assert( !( grbit & JET_bitTableDelete ) ); - Call( ErrCATInitTempFCB( pfucb ) ); + Call( ErrCATInitTempFCB( ppib, pfcb ) ); break; case tt::ExtentPageCountCache: @@ -1709,7 +1724,7 @@ ERR ErrFILEIOpenTable( // initialize the table's FCB // - Call( ErrCATInitFCB( pfucb, objidTable, !( grbit & JET_bitAllowPgnoFDPLastSetTime ) ) ); + Call( ErrCATInitFCB( ppib, pfcb, objidTable, !( grbit & JET_bitAllowPgnoFDPLastSetTime ) ) ); const ULONG cPageReadAfter = Ptls()->threadstats.cPageRead; const ULONG cPagePrereadAfter = Ptls()->threadstats.cPagePreread; @@ -1776,6 +1791,12 @@ ERR ErrFILEIOpenTable( pfcb->CreateComplete(); pfcb->ResetInitedForRecovery(); + if ( fAddlInitDuringRecovery ) + { + pfcb->ReleaseAdditionalInitDuringRecovery(); + fAddlInitDuringRecovery = fFalse; + } + err = ErrFILEICheckAndSetMode( pfucb, grbit ) + ErrFaultInjection( 38304 ); if ( err >= JET_errSuccess ) @@ -1967,6 +1988,20 @@ ERR ErrFILEIOpenTable( return err; HandleError: + if ( fAddlInitDuringRecovery ) + { + Assert( err < JET_errSuccess ); + pfcb->Lock(); + + // Error duing init means that we should be leaving the FCB in a semi-initialized state, + // the same state we initially encountered. + // The next thread to acquire addl init flag will re-attempt initialization. + EnforceSz( pfcb->FInitedForRecovery(), "FCBAddlInit_BadState" ); + + pfcb->ReleaseAdditionalInitDuringRecovery(); + pfcb->Unlock(); + } + Assert( pfucbNil != pfucb || !fInitialisedCursor ); if ( pfucbNil != pfucb ) { @@ -1980,6 +2015,8 @@ ERR ErrFILEIOpenTable( } } + fcbRef.reset(); // do we need to release the FCB before trx rollback? + if ( fInTransaction ) { CallSx( ErrDIRRollback( ppib ), JET_errRollbackError ); @@ -2285,6 +2322,132 @@ VOID FILETableMustRollback( PIB *ppib, FCB *pfcbTable ) } +// Latches pgnoFDP and reads objidFDP off of the PGHDR. +LOCAL ERR ErrFILEIGetObjidFromPgnoFDP( PIB* ppib, IFMP ifmp, PGNO pgnoFDP, OBJID* pobjidFDP ) +{ + ERR err; + + if ( pgnoFDP == pgnoSystemRoot ) + { + *pobjidFDP = objidSystemRoot; + return JET_errSuccess; + } + else if ( FCATSystemTable( pgnoFDP ) ) + { + *pobjidFDP = ObjidCATTable( pgnoFDP ); + return JET_errSuccess; + } + else + { + CSR csr; + CallR( csr.ErrGetReadPage( ppib, ifmp, pgnoFDP, bflfDefault ) ); + + *pobjidFDP = csr.Cpage().ObjidFDP(); + Assert( *pobjidFDP != objidNil ); + csr.ReleasePage(); + return JET_errSuccess; // clobber warnings + } +} + + +// Creates a new FCB, and adds it to the FCB hash and the inst's FCB list. +// Sets the following essential properties on the FCB: +// - objidFDP +// Optional properties are set later in the FCB lifetime. They include: +// - Schema/metadata info (rec info, index info, unique/non-unique etc), set by ErrFILEIInitializeFCB(). +// - Space header (pgnoOE, pgnoAE), by ErrBTOpen(), or deferred initialized by space (in case of ErrBTOpenNoTouch()). +// Note that the FCB is considered ill-formed without the essential properties set. +// But may be usable in some limited form without the optional properties set (e.g. during recovery). +ERR ErrFILEIOpenFCB( + PIB *ppib, + IFMP ifmp, + PGNO pgnoFDP, + OBJID objidFDP, + OPENTYPE opentype, + FCBRef& fcbRef ) +{ + ERR err = JET_errSuccess; + FCB *pfcb = pfcbNil; + + // We are creating an FCB for a new objid, its catalog entry or pgnoFDP may not be initialized yet. + if ( opentype != openNew ) + { + if ( objidFDP == objidNil ) + { + // System tables have constant objids. + // There is no reason for the caller not to pass them in (except repair that may not have enough context). + Assert( !FCATBaseSystemFDP( pgnoFDP ) || g_fRepair ); + Assert( opentype != openNormalNoTouch ); // we need to latch the pgnoFDP here + + Call( ErrFILEIGetObjidFromPgnoFDP( ppib, ifmp, pgnoFDP, &objidFDP ) ); + } + } + + // Create a new FCB and add to FCB hash. + // Only 1 thread gets past this point with JET_errSuccess. + Call( FCB::ErrCreate( ppib, ifmp, pgnoFDP, &pfcb ) ); + + // the creation was successful + + Assert( pfcb->IsLocked() ); + Assert( pfcb->FTypeNull() ); // No fcbtype yet. + Assert( pfcb->Ifmp() == ifmp ); + Assert( pfcb->PgnoFDP() == pgnoFDP ); + Assert( !pfcb->FInitialized() ); + Assert( !pfcb->FSpaceInitialized() ); + Assert( pfcb->WRefCount() == 0 ); + + pfcb->SetObjidFDP( objidFDP ); + + // Increment refcount and set guard object. + pfcb->IncrementRefCount( fTrue /* fOwnWriteLock */ ); + fcbRef.reset( pfcb ); + + pfcb->Unlock(); + + if ( pgnoFDP == pgnoSystemRoot ) + { + // SPECIAL CASE: For database cursor, we've got all the + // information we need. + + // when opening db cursor, always force to check the root page + Assert( objidNil == objidFDP || objidSystemRoot == pfcb->ObjidFDP() ); + + pfcb->Lock(); + pfcb->SetTypeDatabase(); + pfcb->CreateComplete(); // fcb is marked as initialized + pfcb->Unlock(); + + // insert this FCB into the global list, as it is fully initialized + pfcb->InsertList(); + } + + // Initialize space properties. + // Some codepaths require space properties to be cached on the FCB right after creation. + + if ( opentype == openNormal ) + { + Call( ErrSPInitFCB( ppib, pfcb ) ); + } + else + { + Assert( opentype == openNew || opentype == openNormalNoTouch ); + } + + // Finish creating this FCB, non-database FCBs are not fully initialized yet. + // Caller must deal with the rest of the initialization sequence. + Assert( pfcb->WRefCount() == 1 ); + return err; + +HandleError: + // FCB creation can't race with anything else if we are creating a new object. + Assert( err != errFCBExists || opentype != openNew ); + + fcbRef.reset(); // return NULL + return err; +} + + ERR ErrFILEIInitializeFCB( PIB *ppib, IFMP ifmp, @@ -2311,6 +2474,7 @@ ERR ErrFILEIInitializeFCB( pfcbNew->SetPtdb( ptdb ); pfcbNew->Lock(); pfcbNew->SetPrimaryIndex(); + pfcbNew->SetUnique(); // primary index is always unique Assert( !pfcbNew->FSequentialIndex() ); if ( pidbNil == pidb ) { @@ -2329,6 +2493,9 @@ ERR ErrFILEIInitializeFCB( pfcbNew->Lock(); pfcbNew->SetTypeSecondaryIndex(); + + Assert( pidb != NULL ); + pidb->FUnique() ? pfcbNew->SetUnique() : pfcbNew->SetNonUnique(); pfcbNew->Unlock(); } @@ -2398,6 +2565,77 @@ ERR ErrFILEIInitializeFCB( } +LOCAL ERR ErrFILEIFcbGet( PIB* ppib, IFMP ifmp, PGNO pgnoFDP, OBJID objidFDP, OPENTYPE opentype, FCBRef& fcbRef ) +{ + ERR err = JET_errSuccess; + FCBStateFlags fcbsf; + ULONG cRetries = 0; + +RetrieveFCB: + AssertTrack( cRetries != 100000, "TooManyFcbOpenRetries" ); + + // get the FCB for the given ifmp/pgnoFDP + + fcbRef.reset( FCB::PfcbFCBGet( ifmp, pgnoFDP, &fcbsf, fTrue /* fIncrementRefcount */ ) ); + if ( fcbRef.get() == pfcbNil ) + { + // the FCB does not exist + + Assert( fcbsfNone == fcbsf ); + + // try to create a new FCB + + err = ErrFILEIOpenFCB( ppib, ifmp, pgnoFDP, objidFDP, opentype, fcbRef ); + Assert( err <= JET_errSuccess ); // Shouldn't return warnings. + + if ( err == errFCBExists ) + { + + // we failed because someone else was racing to create + // the same FCB that we want, but they beat us to it + + // try to get the FCB again + + err = JET_errSuccess; + UtilSleep( 10 ); + cRetries++; + goto RetrieveFCB; + } + Call( err ); + } + else + { + if ( fcbsf & fcbsfInitialized ) + { + Assert( fcbRef->WRefCount() >= 1); + } + else + { + FireWall( "DeprecatedSentinelFcbBtOpen" ); // Sentinel FCBs are believed deprecated + Assert( !FFMPIsTempDB( ifmp ) ); // Sentinels not used by sort/temp. tables. + + // If we encounter a sentinel, it means the + // table has been locked for subsequent deletion. + fcbRef.reset(); + err = ErrERRCheck( JET_errTableLocked ); + } + } + +HandleError: + return err; +} + +ERR ErrFILEFcbGet( PIB* ppib, IFMP ifmp, PGNO pgnoFDP, OBJID objidFDP, FCBRef& pfcbRef ) +{ + return ErrFILEIFcbGet( ppib, ifmp, pgnoFDP, objidFDP, openNormal, pfcbRef ); +} + +ERR ErrFILEFcbGetNoTouch( PIB* ppib, IFMP ifmp, PGNO pgnoFDP, OBJID objidFDP, FCBRef& pfcbRef ) +{ + return ErrFILEIFcbGet( ppib, ifmp, pgnoFDP, objidFDP, openNormalNoTouch, pfcbRef ); +} + + INLINE VOID RECIForceTaggedColumnsAsDerived( const TDB * const ptdb, DATA& dataDefault ) diff --git a/dev/ese/src/ese/lv.cxx b/dev/ese/src/ese/lv.cxx index ac45a0a6..dcb20912 100644 --- a/dev/ese/src/ese/lv.cxx +++ b/dev/ese/src/ese/lv.cxx @@ -519,12 +519,14 @@ INLINE ERR ErrFILEIInitLVRoot( FUCB *pfucb, const PGNO pgnoLV, FUCB **ppfucbLV ) ERR err; FCB * const pfcbTable = pfucb->u.pfcb; FCB * pfcbLV; + FCBRef fcbRefLV; // Link LV FCB into table. - CallR( ErrDIROpen( pfucb->ppib, pgnoLV, pfucb->ifmp, ppfucbLV, fTrue ) ); + CallR( ErrFILEFcbGet( pfucb->ppib, pfucb->ifmp, pgnoLV, objidNil, fcbRefLV ) ); + CallR( ErrDIROpen( pfucb->ppib, fcbRefLV.get(), ppfucbLV ) ); Assert( *ppfucbLV != pfucbNil ); Assert( !FFUCBVersioned( *ppfucbLV ) ); // Verify won't be deferred closed. - pfcbLV = (*ppfucbLV)->u.pfcb; + pfcbLV = fcbRefLV.get(); Assert( !pfcbLV->FInitialized() || pfcbLV->FInitedForRecovery() ); Assert( pfcbLV->Ifmp() == pfucb->ifmp ); @@ -534,6 +536,7 @@ INLINE ERR ErrFILEIInitLVRoot( FUCB *pfucb, const PGNO pgnoLV, FUCB **ppfucbLV ) // Recovery creates all FCBs as table FCB, now that we know better, we need to remove from list of table FCBs // before we mark FCB as being a LV FCB + pfcbLV->AcquireAdditionalInitDuringRecovery(); // allows only 1 thread through at a time if ( pfcbLV->FInitedForRecovery() ) { pfcbLV->RemoveList(); @@ -558,10 +561,12 @@ INLINE ERR ErrFILEIInitLVRoot( FUCB *pfucb, const PGNO pgnoLV, FUCB **ppfucbLV ) } // finish the initialization of this LV FCB + // an initialized fcb isn't purged by the FCBRef deleter pfcbLV->Lock(); pfcbLV->CreateComplete(); pfcbLV->ResetInitedForRecovery(); + pfcbLV->ReleaseAdditionalInitDuringRecovery(); pfcbLV->Unlock(); // WARNING: publishing the FCB in the TDB *must* diff --git a/dev/ese/src/ese/repair.cxx b/dev/ese/src/ese/repair.cxx index f744915e..12b2fea0 100644 --- a/dev/ese/src/ese/repair.cxx +++ b/dev/ese/src/ese/repair.cxx @@ -8216,6 +8216,7 @@ LOCAL ERR ErrREPAIRRepairGlobalSpace( const PGNO pgnoLast = PgnoLast( ifmp ); const CPG cpgOwned = PgnoLast( ifmp ) - 3; // we will insert three pages in the ErrSPCreate below + FCBRef fcbRef; FUCB *pfucb = pfucbNil; FUCB *pfucbOE = pfucbNil; @@ -8242,7 +8243,8 @@ LOCAL ERR ErrREPAIRRepairGlobalSpace( // So, don't bother adding the (objidFDP, cpgOEFDP, cpgAEFDP) triplet to the // cache. - Call( ErrDIROpen( ppib, pgnoSystemRoot, ifmp, &pfucb ) ); + Call( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRef ) ); + Call( ErrDIROpen( ppib, fcbRef.get(), &pfucb ) ); // The tree has only one node so we can insert ths node without splitting Call( ErrSPIOpenOwnExt( pfucb, &pfucbOE ) ); @@ -8330,6 +8332,7 @@ LOCAL ERR ErrREPAIRDeleteCorruptedEntriesFromCatalog( // ================================================================ { ERR err = JET_errSuccess; + FCBRef fcbRefCatalog; FUCB * pfucbCatalog = pfucbNil; ENTRYINFO entryinfo; @@ -8342,7 +8345,8 @@ LOCAL ERR ErrREPAIRDeleteCorruptedEntriesFromCatalog( CallR( ErrDIRBeginTransaction( ppib, 64549, NO_GRBIT ) ); - Call( ErrDIROpen( ppib, pgnoFDPMSO, ifmp, &pfucbCatalog ) ); + Call( ErrFILEFcbGet( ppib, ifmp, pgnoFDPMSO, objidFDPMSO, fcbRefCatalog ) ); + Call( ErrDIROpen( ppib, fcbRefCatalog.get(), &pfucbCatalog ) ); Assert( pfucbNil != pfucbCatalog ); FUCBSetIndex( pfucbCatalog ); @@ -8433,6 +8437,8 @@ LOCAL ERR ErrREPAIRDeleteCorruptedEntriesFromCatalog( DIRClose( pfucbCatalog ); } + fcbRefCatalog.reset(); // do we need to release the FCB before trx rollback? + if ( JET_errSuccess != err ) { CallSx( ErrDIRRollback( ppib ), JET_errRollbackError ); @@ -8481,6 +8487,7 @@ LOCAL ERR ErrREPAIRRepairCatalogs( { ERR err = JET_errSuccess; + FCBRef fcbRefParent; FUCB * pfucbParent = pfucbNil; FUCB * pfucbCatalog = pfucbNil; FUCB * pfucbShadowCatalog = pfucbNil; @@ -8493,7 +8500,8 @@ LOCAL ERR ErrREPAIRRepairCatalogs( if ( fCatalogCorrupt || fShadowCatalogCorrupt ) { // we'll need this for the space - Call( ErrDIROpen( ppib, pgnoSystemRoot, ifmp, &pfucbParent ) ); + Call( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRefParent ) ); + Call( ErrDIROpen( ppib, fcbRefParent.get(), &pfucbParent ) ); } if ( fCatalogCorrupt && fShadowCatalogCorrupt ) @@ -8996,6 +9004,7 @@ LOCAL ERR ErrREPAIRCopyTempTableToCatalog( JET_ERR err = JET_errSuccess; + FCBRef fcbRefParent; FUCB * pfucbParent = pfucbNil; FUCB * pfucbCatalog = pfucbNil; FUCB * pfucbSpace = pfucbNil; @@ -9007,7 +9016,8 @@ LOCAL ERR ErrREPAIRCopyTempTableToCatalog( VOID * pvData = NULL; BFAlloc( bfasIndeterminate, &pvData ); - Call( ErrDIROpen( ppib, pgnoSystemRoot, ifmp, &pfucbParent ) ); + Call( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRefParent ) ); + Call( ErrDIROpen( ppib, fcbRefParent.get(), &pfucbParent ) ); Assert( pfucbNil != pfucbParent ); // when we create this we cannot make all the pages available, some will be needed later @@ -9651,10 +9661,12 @@ LOCAL ERR ErrREPAIRCreateEmptyFDP( const CPG cpgMin = cpgMultipleExtentMin; CPG cpgRequest = cpgMin; + FCBRef fcbRef; FUCB * pfucb = pfucbNil; // the fucb is used to get an extent from the parent - Call( ErrDIROpen( ppib, pgnoParent, ifmp, &pfucb ) ); + Call( ErrFILEFcbGet( ppib, ifmp, pgnoParent, objidNil, fcbRef ) ); + Call( ErrDIROpen( ppib, fcbRef.get(), &pfucb ) ); if ( pgnoNull == *ppgnoFDPNew ) { Call( ErrSPGetExt( @@ -9852,6 +9864,7 @@ LOCAL ERR ErrREPAIRRebuildSpace( CPG cpgRun = 0; FUCB *pfucbOE = pfucbNil; + FCBRef fcbRefParent; FUCB *pfucbParent = pfucbNil; const OBJID objidFDP = pfucb->u.pfcb->ObjidFDP(); @@ -9883,7 +9896,8 @@ LOCAL ERR ErrREPAIRRebuildSpace( Assert( pgnoNull != pgnoParent ); if ( pgnoNull != pgnoParent ) { - Call( ErrBTOpen( ppib, pgnoParent, ifmp, &pfucbParent ) ); + Call( ErrFILEFcbGet( ppib, ifmp, pgnoParent, objidNil, fcbRefParent ) ); + Call( ErrBTOpen( ppib, fcbRefParent.get(), &pfucbParent ) ); Assert( pfucbNil != pfucbParent ); Assert( pfcbNil != pfucbParent->u.pfcb ); Assert( pfucbParent->u.pfcb->FInitialized() ); @@ -10313,6 +10327,7 @@ LOCAL ERR ErrREPAIRFixLVs( { ERR err = JET_errSuccess; + FCBRef fcbRef; FUCB * pfucb = pfucbNil; BOOL fDone = fFalse; LvId lidCurr; @@ -10327,7 +10342,8 @@ LOCAL ERR ErrREPAIRFixLVs( (*popts->pcprintfVerbose)( "fixing long value tree\r\n" ); - Call( ErrDIROpen( ppib, pgnoLV, ifmp, &pfucb ) ); + Call( ErrFILEFcbGet( ppib, ifmp, pgnoLV, prepairtable->objidLV, fcbRef ) ); + Call( ErrDIROpen( ppib, fcbRef.get(), &pfucb ) ); Assert( pfucbNil != pfucb ); // Make sure the LV FCB is properly linked to the Table FCB, needed later to look up chunk-size Assert( pfucb->u.pfcb->PfcbTable() != pfcbNil ); @@ -10408,7 +10424,7 @@ LOCAL ERR ErrREPAIRFixLVs( DIRUp( pfucb ); } - Call( ErrDIROpen( ppib, pgnoLV, ifmp, &pfucbLVRoot ) ); + Call( ErrDIROpen( ppib, fcbRef.get(), &pfucbLVRoot ) ); (*popts->pcprintfVerbose)( "long value 0x%I64x has no root. creating a root with refcount %d and size %d\r\n", (_LID64)lidCurr, ulRefcount, ulSize ); @@ -10964,6 +10980,7 @@ LOCAL ERR ErrREPAIRFixRecords( { ERR err = JET_errSuccess; + FCBRef fcbRef; FUCB * pfucb = pfucbNil; INT crecordDeleted = 0; @@ -10975,7 +10992,9 @@ LOCAL ERR ErrREPAIRFixRecords( (*popts->pcprintfVerbose)( "fixing records\r\n" ); - Call( ErrDIROpen( ppib, pgnoFDP, ifmp, &pfucb ) ); + Assert( objidNil != prepairtable->objidFDP ); + Call( ErrFILEFcbGet( ppib, ifmp, pgnoFDP, prepairtable->objidFDP, fcbRef ) ); + Call( ErrDIROpen( ppib, fcbRef.get(), &pfucb ) ); Assert( pfucbNil != pfucb ); FUCBSetIndex( pfucb ); @@ -11090,6 +11109,7 @@ LOCAL ERR ErrREPAIRFixLVRefcounts( //- { ERR err = JET_errSuccess; + FCBRef fcbRef; FUCB * pfucb = pfucbNil; (*popts->pcprintfVerbose)( "fixing long value refcounts\r\n" ); @@ -11102,7 +11122,8 @@ LOCAL ERR ErrREPAIRFixLVRefcounts( } Call( pttmapLVTree->ErrMoveFirst() ); - Call( ErrDIROpen( ppib, pgnoLV, ifmp, &pfucb ) ); + Call( ErrFILEFcbGet( ppib, ifmp, pgnoLV, prepairtable->objidLV, fcbRef ) ); + Call( ErrDIROpen( ppib, fcbRef.get(), &pfucb ) ); // Mark the fcb as being an LV; (see ErrFILEIInitLVRoot for // other initialization that isn't needed) diff --git a/dev/ese/src/ese/space.cxx b/dev/ese/src/ese/space.cxx index d218b3a4..a23135da 100644 --- a/dev/ese/src/ese/space.cxx +++ b/dev/ese/src/ese/space.cxx @@ -2338,11 +2338,13 @@ ERR ErrSPGetLastPgno( _Inout_ PIB * ppib, _In_ const IFMP ifmp, _Out_ PGNO * ppg ERR ErrSPGetLastExtent( _Inout_ PIB * ppib, _In_ const IFMP ifmp, _Out_ EXTENTINFO * pextinfo ) { ERR err; + FCBRef fcbRef; FUCB *pfucb = pfucbNil; FUCB *pfucbOE = pfucbNil; DIB dib; - CallR( ErrBTOpen( ppib, pgnoSystemRoot, ifmp, &pfucb, openNormal, fTrue ) ); + CallR( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRef ) ); + CallR( ErrBTOpen( ppib, fcbRef.get(), &pfucb ) ); Assert( pfucbNil != pfucb ); PIBTraceContextScope tcScope = pfucb->ppib->InitTraceContextScope(); @@ -2355,14 +2357,23 @@ ERR ErrSPGetLastExtent( _Inout_ PIB * ppib, _In_ const IFMP ifmp, _Out_ EXTENTIN Assert( PinstFromPpib( ppib )->FRecovering() ); Assert( pfucb->u.pfcb->FInitedForRecovery() ); + pfucb->u.pfcb->AcquireAdditionalInitDuringRecovery(); + // pgnoOE and pgnoAE need to be obtained // - Call( ErrSPInitFCB( pfucb ) ); + err = ErrSPInitFCB( ppib, fcbRef.get() ); pfucb->u.pfcb->Lock(); - pfucb->u.pfcb->ResetInitedForRecovery(); + if ( err >= JET_errSuccess ) + { + pfucb->u.pfcb->ResetInitedForRecovery(); + } + + pfucb->u.pfcb->ReleaseAdditionalInitDuringRecovery(); pfucb->u.pfcb->Unlock(); + Call( err ); } + Assert( pfucb->u.pfcb->FSpaceInitialized() ); Assert( pfucb->u.pfcb->PgnoOE() != pgnoNull ); @@ -2411,14 +2422,39 @@ ERR ErrSPGetLastExtent( _Inout_ PIB * ppib, _In_ const IFMP ifmp, _Out_ EXTENTIN } +LOCAL ERR ErrSPIOpenAndGotoRoot( PIB* ppib, FCB* pfcb, FUCB** ppfucb ) +{ + ERR err; + FUCB* pfucb; + + CallR( ErrBTOpen( ppib, pfcb, &pfucb ) ); + Assert( pfucbNil != pfucb ); + Assert( pfcbNil != pfucb->u.pfcb ); + Assert( pfucb->u.pfcb->FInitialized() ); + + err = ErrBTIGotoRoot( pfucb, latchRIW ); + if ( err < JET_errSuccess ) + { + BTClose( pfucb ); + } + else + { + Assert( latchRIW == Pcsr( pfucb )->Latch() ); + Assert( pcsrNil == pfucb->pcsrRoot ); + pfucb->pcsrRoot = Pcsr( pfucb ); + + *ppfucb = pfucb; + } + + return err; +} + + // Validate I have not unintentionally changed SPACE_HEADER size. C_ASSERT( sizeof(SPACE_HEADER) == 16 ); -LOCAL VOID SPIInitFCB( FUCB * pfucb, const BOOL fDeferredInit ) +LOCAL VOID SPIInitFCB( FCB * pfcb, CSR* pcsr ) { - CSR * pcsr = ( fDeferredInit ? pfucb->pcsrRoot : Pcsr( pfucb ) ); - FCB * pfcb = pfucb->u.pfcb; - Assert( pcsr->FLatched() ); // need to acquire FCB lock because that's what protects the Flags @@ -2428,9 +2464,10 @@ LOCAL VOID SPIInitFCB( FUCB * pfucb, const BOOL fDeferredInit ) { // get external header // - NDGetExternalHeader ( pfucb, pcsr, noderfSpaceHeader ); - Assert( sizeof( SPACE_HEADER ) == pfucb->kdfCurr.data.Cb() ); - const SPACE_HEADER * const psph = reinterpret_cast ( pfucb->kdfCurr.data.Pv() ); + KEYDATAFLAGS kdf; + NDGetExternalHeader ( &kdf, pcsr, noderfSpaceHeader ); + Assert( sizeof( SPACE_HEADER ) == kdf.data.Cb() ); + const SPACE_HEADER * const psph = reinterpret_cast ( kdf.data.Pv() ); if ( psph->FSingleExtent() ) { @@ -2444,13 +2481,10 @@ LOCAL VOID SPIInitFCB( FUCB * pfucb, const BOOL fDeferredInit ) Assert( pfcb->PgnoAE() == pfcb->PgnoOE() + 1 ); } - if ( !fDeferredInit ) + if ( pfcb->FInitialized() ) { - Assert( pfcb->FUnique() ); // FCB always initialised as unique - if ( psph->FNonUnique() ) - pfcb->SetNonUnique(); + Assert( !!psph->FNonUnique() == !pfcb->FUnique() ); } - Assert( !!psph->FNonUnique() == !pfcb->FUnique() ); pfcb->SetSpaceInitialized(); @@ -2465,22 +2499,21 @@ LOCAL VOID SPIInitFCB( FUCB * pfucb, const BOOL fDeferredInit ) // initializes FCB with pgnoAE and pgnoOE // -ERR ErrSPInitFCB( _Inout_ FUCB * const pfucb ) +ERR ErrSPInitFCB( _In_ PIB * ppib, _Inout_ FCB * const pfcb ) { ERR err; - FCB *pfcb = pfucb->u.pfcb; + CSR csr; - PIBTraceContextScope tcScope = pfucb->ppib->InitTraceContextScope(); - tcScope->nParentObjectClass = TceFromFUCB( pfucb ); + PIBTraceContextScope tcScope = ppib->InitTraceContextScope(); + tcScope->nParentObjectClass = pfcb->TCE( fTrue ); tcScope->iorReason.SetIort( iortSpace ); tcScope->SetDwEngineObjid( pfcb->ObjidFDP() ); - Assert( !Pcsr( pfucb )->FLatched() ); - Assert( !FFUCBSpace( pfucb ) ); + Expected( !pfcb->FSpaceInitialized() ); // goto root page of tree // - err = ErrBTIGotoRoot( pfucb, latchReadTouch ); + err = csr.ErrGetReadPage( ppib, pfcb->Ifmp(), pfcb->PgnoFDP(), bflfDefault ); if ( err < 0 ) { if ( g_fRepair ) @@ -2496,15 +2529,12 @@ ERR ErrSPInitFCB( _Inout_ FUCB * const pfucb ) } else { - // get objidFDP from root page, FCB can only be set once + // objid from fcb and the page header must match - Assert( objidNil == pfcb->ObjidFDP() - || ( PinstFromIfmp( pfucb->ifmp )->FRecovering() && pfcb->ObjidFDP() == Pcsr( pfucb )->Cpage().ObjidFDP() ) ); - pfcb->SetObjidFDP( Pcsr( pfucb )->Cpage().ObjidFDP() ); - - SPIInitFCB( pfucb, fFalse ); - - BTUp( pfucb ); + Assert( pfcb->ObjidFDP() == csr.Cpage().ObjidFDP() ); + SPIInitFCB( pfcb, &csr ); + csr.ReleasePage(); + err = JET_errSuccess; // clobber warnings } return err; @@ -2527,18 +2557,14 @@ ERR ErrSPDeferredInitFCB( _Inout_ FUCB * const pfucb ) // goto root page of tree // - CallR( ErrBTIOpenAndGotoRoot( - pfucb->ppib, - pfcb->PgnoFDP(), - pfucb->ifmp, - &pfucbT ) ); + CallR( ErrSPIOpenAndGotoRoot( pfucb->ppib, pfcb, &pfucbT ) ); Assert( pfucbNil != pfucbT ); Assert( pfucbT->u.pfcb == pfcb ); Assert( pcsrNil != pfucbT->pcsrRoot ); if ( !pfcb->FSpaceInitialized() ) { - SPIInitFCB( pfucbT, fTrue ); + SPIInitFCB( pfcb, pfucbT->pcsrRoot ); } SPIValidateCpgOwnedAndAvail( pfucbT ); @@ -3308,6 +3334,7 @@ ERR ErrSPCreate( CPG *pcpgAEFDP ) { ERR err; + FCBRef fcbRef; FUCB *pfucb = pfucbNil; const BOOL fUnique = !( fSPFlags & fSPNonUnique ); @@ -3377,11 +3404,9 @@ ERR ErrSPCreate( // table. Implement a fix to allow leaving the FCB in an uninitialized // state, then have it initialized by the subsequent DIR/BTOpen. // - CallR( ErrBTOpen( ppib, pgnoFDP, ifmp, &pfucb, openNew ) ); - - tcScope->nParentObjectClass = TceFromFUCB( pfucb ); + Call( ErrFILEIOpenFCB( ppib, ifmp, pgnoFDP, objidNil, openNew, fcbRef ) ); - FCB *pfcb = pfucb->u.pfcb; + FCB* pfcb = fcbRef.get(); Assert( pfcbNil != pfcb ); if ( pgnoSystemRoot == pgnoFDP ) @@ -3411,7 +3436,7 @@ ERR ErrSPCreate( } else { - Call( g_rgfmp[ pfucb->ifmp ].ErrObjidLastIncrementAndGet( pobjidFDP ) ); + Call( g_rgfmp[ ifmp ].ErrObjidLastIncrementAndGet( pobjidFDP ) ); } Assert( pgnoSystemRoot != pgnoFDP || objidSystemRoot == *pobjidFDP ); @@ -3426,6 +3451,9 @@ ERR ErrSPCreate( pfcb->SetSpaceInitialized(); pfcb->Unlock(); + CallR( ErrBTOpen( ppib, pfcb, &pfucb ) ); // can't latch root page, it doesn't exist yet + tcScope->nParentObjectClass = TceFromFUCB( pfucb ); + if ( fSPFlags & fSPMultipleExtent ) { Assert( PgnoFDP( pfucb ) == pgnoFDP ); @@ -3477,6 +3505,7 @@ ERR ErrSPCreate( Assert( !FFUCBVersioned( pfucb ) ); HandleError: + Assert( err != errFCBExists ); // creation of an FCB for a new objid can't conflict Assert( ( pfucb != pfucbNil ) || ( err < JET_errSuccess ) ); if ( pfucb != pfucbNil ) @@ -6046,7 +6075,7 @@ LOCAL ERR ErrSPIGetExt( if ( !pfcb->FSpaceInitialized() ) { - SPIInitFCB( pfucbSrc, fTrue ); + SPIInitFCB( pfcb, pfucbSrc->pcsrRoot ); } // @@ -6392,6 +6421,7 @@ ERR ErrSPGetExt( FUCB *pfucbParent = pfucbNil; CPG cpgOEFDP; CPG cpgAEFDP; + FCBRef fcbRef; PIBTraceContextScope tcScope = pfucb->ppib->InitTraceContextScope(); tcScope->nParentObjectClass = TceFromFUCB( pfucb ); @@ -6404,14 +6434,15 @@ ERR ErrSPGetExt( // open cursor on Parent and RIW latch root page // - Call( ErrBTIOpenAndGotoRoot( pfucb->ppib, pgnoParentFDP, pfucb->ifmp, &pfucbParent ) ); + Call( ErrFILEFcbGet( pfucb->ppib, pfucb->ifmp, pgnoParentFDP, objidNil, fcbRef ) ); + Call( ErrSPIOpenAndGotoRoot( pfucb->ppib, fcbRef.get(), &pfucbParent ) ); // allocate an extent // Note: We get back info on OE and AE so we can add the value to the cpg cache // AFTER we've released the root. This is because adding a value to the cpg cache // may cause a split in the cpg cache table, and that means we'd need to get space // from the DBRoot. If pgnoParentFDP happens to be systemRoot, that results in - // trying to latch the page twice, one in ErrBTIOpenAndGotoRoot and one several + // trying to latch the page twice, one in ErrSPIOpenAndGotoRoot and one several // levels lower in the callstack. err = ErrSPIGetExt( @@ -7355,7 +7386,7 @@ ERR ErrSPGetPage( if ( !pfcb->FSpaceInitialized() ) { - SPIInitFCB( pfucb, fTrue ); + SPIInitFCB( pfcb, pfucb->pcsrRoot ); } // @@ -7479,7 +7510,7 @@ LOCAL ERR ErrSPIFreeSEToParent( // parent must always be in memory // - pfcbParent = FCB::PfcbFCBGet( pfucb->ifmp, pgnoParentFDP, &fcbsf, fTrue, fTrue ); + pfcbParent = FCB::PfcbFCBGet( pfucb->ifmp, pgnoParentFDP, &fcbsf, fTrue ); Assert( pfcbParent != pfcbNil ); Assert( fcbsf & fcbsfInitialized ); Assert( !pfcb->FTypeNull() ); @@ -7535,7 +7566,7 @@ LOCAL ERR ErrSPIFreeSEToParent( // if ( pfucbParentLocal == pfucbNil ) { - Call( ErrBTIOpenAndGotoRoot( pfucb->ppib, pgnoParentFDP, pfucb->ifmp, &pfucbParentLocal ) ); + Call( ErrSPIOpenAndGotoRoot( pfucb->ppib, pfcbParent, &pfucbParentLocal ) ); } else { @@ -8891,7 +8922,7 @@ ERR ErrSPFreeExt( FUCB* const pfucb, const PGNO pgnoFirst, const CPG cpgSize, co if ( !pfcb->FSpaceInitialized() ) { - SPIInitFCB( pfucb, fTrue ); + SPIInitFCB( pfcb, pfucb->pcsrRoot ); } // @@ -9298,10 +9329,12 @@ ERR ErrSPShelvePage( PIB* const ppib, const IFMP ifmp, const PGNO pgno ) Assert( pfmp->FBeyondPgnoShrinkTarget( pgno ) ); Assert( pgno <= pfmp->PgnoLast() ); - FUCB* pfucbRoot = pfucbNil; - FUCB* pfucbAE = pfucbNil; + FUCB* pfucbRoot = pfucbNil; + FUCB* pfucbAE = pfucbNil; + FCBRef fcbRefSystemRoot; - Call( ErrBTIOpenAndGotoRoot( ppib, pgnoSystemRoot, ifmp, &pfucbRoot ) ); + Call( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRefSystemRoot ) ); + Call( ErrSPIOpenAndGotoRoot( ppib, fcbRefSystemRoot.get(), &pfucbRoot)); Call( ErrSPIOpenAvailExt( pfucbRoot, &pfucbAE ) ); Call( ErrSPIReserveSPBufPages( pfucbRoot ) ); @@ -9339,10 +9372,12 @@ ERR ErrSPShelvePage( PIB* const ppib, const IFMP ifmp, const PGNO pgno ) //- ERR ErrSPUnshelveShelvedPagesBelowEof( PIB* const ppib, const IFMP ifmp ) { - ERR err = JET_errSuccess; - FMP* const pfmp = g_rgfmp + ifmp; - FUCB* pfucbRoot = pfucbNil; - BOOL fInTransaction = fFalse; + ERR err = JET_errSuccess; + FMP* const pfmp = g_rgfmp + ifmp; + FUCB* pfucbRoot = pfucbNil; + FCBRef fcbRefSystemRoot; + BOOL fInTransaction = fFalse; + Assert( !pfmp->FIsTempDB() ); Expected( pfmp->FShrinkIsRunning() ); @@ -9352,7 +9387,8 @@ ERR ErrSPUnshelveShelvedPagesBelowEof( PIB* const ppib, const IFMP ifmp ) Call( ErrDIRBeginTransaction( ppib, 46018, NO_GRBIT ) ); fInTransaction = fTrue; - Call( ErrBTIOpenAndGotoRoot( ppib, pgnoSystemRoot, ifmp, &pfucbRoot ) ); + Call( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRefSystemRoot ) ); + Call( ErrSPIOpenAndGotoRoot( ppib, fcbRefSystemRoot.get(), &pfucbRoot ) ); Call( ErrSPIUnshelvePagesInRange( pfucbRoot, 1, pfmp->PgnoLast() ) ); @@ -10020,6 +10056,7 @@ ERR ErrSPReclaimSpaceLeaks( PIB* const ppib, const IFMP ifmp ) PGNO pgnoFirstShelved = pgnoNull, pgnoLastShelved = pgnoNull; FUCB* pfucbCatalog = pfucbNil; FUCB* pfucbRoot = pfucbNil; + FCBRef fcbRefRoot; CSparseBitmap spbmOwned; CArray arrShelved; @@ -10037,6 +10074,8 @@ ERR ErrSPReclaimSpaceLeaks( PIB* const ppib, const IFMP ifmp ) Assert( ifmpDummy == ifmp ); fDbOpen = fTrue; + Call( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRefRoot ) ); + // Reclaiming leaked space requires a fully populated MSysObjids table because // that is how we enumerate all the tables efficiently. { @@ -10200,7 +10239,7 @@ ERR ErrSPReclaimSpaceLeaks( PIB* const ppib, const IFMP ifmp ) Call( ErrDIRBeginTransaction( ppib, 37218, NO_GRBIT ) ); fInTransaction = fTrue; - Call( ErrDIROpen( ppib, pgnoSystemRoot, ifmp, &pfucbRoot ) ); + Call( ErrDIROpen( ppib, fcbRefRoot.get(), &pfucbRoot ) ); CSPExtentInfo spext; err = ErrSPIFindExtOE( ppib, pfucbRoot->u.pfcb, pgnoFirstToReclaim, &spext ); @@ -10319,7 +10358,7 @@ ERR ErrSPReclaimSpaceLeaks( PIB* const ppib, const IFMP ifmp ) Call( ErrDIRBeginTransaction( ppib, 51150, NO_GRBIT ) ); fInTransaction = fTrue; - Call( ErrDIROpen( ppib, pgnoSystemRoot, ifmp, &pfucbRoot ) ); + Call( ErrDIROpen( ppib, fcbRefRoot.get(), &pfucbRoot ) ); const PGNO pgnoLastToReclaimBelowEof = UlFunctionalMin( pgnoLastToReclaim, pgnoLastInitial ); if ( pgnoFirstToReclaim <= pgnoLastInitial ) { @@ -10382,6 +10421,8 @@ ERR ErrSPReclaimSpaceLeaks( PIB* const ppib, const IFMP ifmp ) pfucbRoot = pfucbNil; } + fcbRefRoot.reset(); + // WARNING: most (if not all) of the above is done without versioning, so there // really isn't any rollback of the update. if ( fInTransaction ) @@ -10861,6 +10902,7 @@ ERR ErrSPFreeFDP( ERR err; const IFMP ifmp = pfcbFDPToFree->Ifmp(); const PGNO pgnoFDPFree = pfcbFDPToFree->PgnoFDP(); + FCBRef fcbRefParent; FUCB *pfucbParent = pfucbNil; FUCB *pfucb = pfucbNil; CPG cpgRootCaptured = 0; @@ -10893,7 +10935,9 @@ ERR ErrSPFreeFDP( Assert( !FFMPIsTempDB( ifmp ) || pgnoSystemRoot == pgnoFDPParent ); - Call( ErrBTOpen( ppib, pgnoFDPParent, ifmp, &pfucbParent ) ); + OBJID objidParent = ( pgnoSystemRoot == pgnoFDPParent ? objidSystemRoot : objidNil ); + Call( ErrFILEFcbGet( ppib, ifmp, pgnoFDPParent, objidParent, fcbRefParent ) ); + Call( ErrBTOpen( ppib, fcbRefParent.get(), &pfucbParent ) ); Assert( pfucbNil != pfucbParent ); Assert( pfucbParent->u.pfcb->FInitialized() ); @@ -10939,7 +10983,7 @@ ERR ErrSPFreeFDP( if ( !pfucb->u.pfcb->FSpaceInitialized() ) { - SPIInitFCB( pfucb, fTrue ); + SPIInitFCB( pfucb->u.pfcb, pfucb->pcsrRoot ); } // We expect this to fail to find the FCB in the cache if we're deleting it. @@ -12075,6 +12119,7 @@ ERR ErrSPExtendDB( ERR err; FUCB *pfucbDbRoot = pfucbNil; FUCB *pfucbAE = pfucbNil; + FCBRef fcbRefSystemRoot; PIBTraceContextScope tcScope = ppib->InitTraceContextScope(); tcScope->iorReason.SetIort( iortSpace ); @@ -12082,7 +12127,8 @@ ERR ErrSPExtendDB( // open cursor on System / DB Root and RIW latch root page // - CallR( ErrBTIOpenAndGotoRoot( ppib, pgnoSystemRoot, ifmp, &pfucbDbRoot ) ); + CallR( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRefSystemRoot ) ); + CallR( ErrSPIOpenAndGotoRoot( ppib, fcbRefSystemRoot.get(), &pfucbDbRoot ) ); tcScope->nParentObjectClass = TceFromFUCB( pfucbDbRoot ); Assert( objidSystemRoot == ObjidFDP( pfucbDbRoot ) ); @@ -12241,6 +12287,7 @@ ERR ErrSPShrinkTruncateLastExtent( FUCB* pfucbRoot = pfucbNil; FUCB* pfucbOE = pfucbNil; FUCB* pfucbAE = pfucbNil; + FCBRef fcbRefSystemRoot; PIBTraceContextScope tcScope = ppib->InitTraceContextScope( ); tcScope->iorReason.SetIort( iortDbShrink ); tcScope->SetDwEngineObjid( objidSystemRoot ); @@ -12263,7 +12310,8 @@ ERR ErrSPShrinkTruncateLastExtent( fInTransaction = fTrue; // Open space trees. - Call( ErrBTIOpenAndGotoRoot( ppib, pgnoSystemRoot, ifmp, &pfucbRoot ) ); + Call( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRefSystemRoot ) ); + Call( ErrSPIOpenAndGotoRoot( ppib, fcbRefSystemRoot.get(), &pfucbRoot ) ); Call( ErrSPIOpenOwnExt( pfucbRoot, &pfucbOE ) ); Call( ErrSPIOpenAvailExt( pfucbRoot, &pfucbAE ) ); @@ -13348,10 +13396,9 @@ ERR ErrSPReserveSPBufPages( FUCB* pfucbOwningTree = pfucbNil; // Open FUCB of the owning tree, just in case this is a space FUCB. - Call( ErrBTIOpenAndGotoRoot( + Call( ErrSPIOpenAndGotoRoot( pfucb->ppib, - pfucb->u.pfcb->PgnoFDP(), - pfucb->ifmp, + pfucb->u.pfcb, &pfucbOwningTree ) ); Assert( pfucbOwningTree->u.pfcb == pfucb->u.pfcb ); @@ -13417,6 +13464,7 @@ LOCAL ERR ErrSPIReserveSPBufPages( ERR err = JET_errSuccess; FMP* const pfmp = &g_rgfmp[ pfucb->ifmp ]; FCB* const pfcb = pfucb->u.pfcb; + FCBRef fcbRefParent; FUCB* pfucbParentLocal = pfucbParent; FUCB* pfucbOE = pfucbNil; FUCB* pfucbAE = pfucbNil; @@ -13432,17 +13480,10 @@ LOCAL ERR ErrSPIReserveSPBufPages( Assert( ( pgnoParentFDP != pgnoNull ) || ( pfucbParent == pfucbNil ) ); if ( ( pfucbParentLocal == pfucbNil ) && ( pgnoParentFDP != pgnoNull ) ) { - // Open cursor on parent FDP to get space from. Don't GotoRoot yet, we don't want to be latched - // while calling ErrSPIReserveSPBufPages. + // Open cursor on parent FDP to get space from. // - Call( ErrBTIOpen( - pfucb->ppib, - pfucb->ifmp, - pgnoParentFDP, - objidNil, - openNormal, - &pfucbParentLocal, - fFalse ) ); + Call( ErrFILEFcbGet( pfucb->ppib, pfucb->ifmp, pgnoParentFDP, objidNil, fcbRefParent ) ); + Call( ErrBTOpen( pfucb->ppib, fcbRefParent.get(), &pfucbParentLocal ) ); Assert( pcsrNil == pfucbParentLocal->pcsrRoot ); } @@ -13875,20 +13916,13 @@ LOCAL ERR ErrSPIGetSe( AssertSPIPfucbOnRoot( pfucb ); { + FCBRef fcbRefParent; FUCB *pfucbParentLocal = pfucbNil; - // Open cursor on parent FDP to get space from. Don't GotoRoot yet, we don't want to be latched - // while calling ErrSPIReserveSPBufPages, but it can be a time savings to already have an FUCB - // that we can use for multiple calls. + // Open cursor on parent FDP to get space from. // - Call( ErrBTIOpen( - pfucb->ppib, - pfucb->ifmp, - pgnoParentFDP, - objidNil, - openNormal, - &pfucbParentLocal, - fFalse ) ); + Call( ErrFILEFcbGet( pfucb->ppib, pfucb->ifmp, pgnoParentFDP, objidNil, fcbRefParent ) ); + Call( ErrBTOpen( pfucb->ppib, fcbRefParent.get(), &pfucbParentLocal ) ); Assert( pcsrNil == pfucbParentLocal->pcsrRoot ); CallJ( ErrSPIReserveSPBufPages( pfucb, pfucbParentLocal ), CloseParent ); @@ -15545,7 +15579,10 @@ ERR ErrSPGetInfo( Assert( !FSPReachablePages( fSPExtents ) ); if ( pfucbNil == pfucb ) { - err = ErrBTOpen( ppib, pgnoSystemRoot, ifmp, &pfucbT ); + FCBRef fcbRef; + CallR( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRef ) ); + CallR( ErrBTOpen( ppib, fcbRef.get(), &pfucbT) ); + Assert( fcbRef->FInitialized() ); // an initialized FCB isn't purged when fcbRef goes out of scope } else { @@ -15592,7 +15629,7 @@ ERR ErrSPGetInfo( { // UNDONE: Are there concurrency issues with updating the FCB // while we only have a read latch? - SPIInitFCB( pfucbT, fTrue ); + SPIInitFCB( pfucbT->u.pfcb, pfucbT->pcsrRoot ); if( !FSPIIsSmall( pfucbT->u.pfcb ) ) { BFPrereadPageRange( pfucbT->ifmp, pfucbT->u.pfcb->PgnoOE(), 2, NULL, NULL, bfprfDefault, ppib->BfpriPriority( pfucbT->ifmp ), *tcScope ); @@ -16304,7 +16341,10 @@ ERR ErrSPGetExtentInfo( if ( pfucbNil == pfucb ) { - err = ErrBTOpen( ppib, pgnoSystemRoot, ifmp, &pfucbT ); + FCBRef fcbRef; + CallR( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRef ) ); + CallR( ErrBTOpen( ppib, fcbRef.get(), &pfucbT ) ); + Assert( fcbRef->FInitialized() ); // an initialized FCB isn't purged when fcbRef goes out of scope } else { @@ -16326,7 +16366,7 @@ ERR ErrSPGetExtentInfo( { // UNDONE: Are there cuncurrency issues with updating the FCB // while we only have a read latch? - SPIInitFCB( pfucbT, fTrue ); + SPIInitFCB( pfucbT->u.pfcb, pfucbT->pcsrRoot ); if( !FSPIIsSmall( pfucbT->u.pfcb ) ) { BFPrereadPageRange( pfucbT->ifmp, pfucbT->u.pfcb->PgnoOE(), 2, bfprfDefault, ppib->BfpriPriority( pfucbT->ifmp ), *tcScope ); @@ -16715,6 +16755,7 @@ ERR ErrSPTrimRootAvail( CPG cpgAvailExtTotalSparseAfter = 0; FUCB *pfucbT = pfucbNil; FUCB *pfucbAE = pfucbNil; + FCBRef fcbRef; PIBTraceContextScope tcScope = ppib->InitTraceContextScope(); tcScope->iorReason.SetIort( iortSpace ); @@ -16723,14 +16764,15 @@ ERR ErrSPTrimRootAvail( memset( (void*)&spbufOnAE, 0, sizeof(spbufOnAE) ); - Call( ErrBTIOpenAndGotoRoot( ppib, pgnoSystemRoot, ifmp, &pfucbT ) ); + CallR( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRef ) ); + Call( ErrSPIOpenAndGotoRoot( ppib, fcbRef.get(), &pfucbT)); AssertSPIPfucbOnRoot( pfucbT ); if ( !pfucbT->u.pfcb->FSpaceInitialized() ) { // UNDONE: Are there cuncurrency issues with updating the FCB // while we only have a read latch? - SPIInitFCB( pfucbT, fTrue ); + SPIInitFCB( pfucbT->u.pfcb, pfucbT->pcsrRoot ); if( !FSPIIsSmall( pfucbT->u.pfcb ) ) { BFPrereadPageRange( pfucbT->ifmp, pfucbT->u.pfcb->PgnoAE(), 2, bfprfDefault, ppib->BfpriPriority( pfucbT->ifmp ), *tcScope ); diff --git a/dev/ese/src/inc/_bt.hxx b/dev/ese/src/inc/_bt.hxx index 99427266..d9d56845 100644 --- a/dev/ese/src/inc/_bt.hxx +++ b/dev/ese/src/inc/_bt.hxx @@ -3,8 +3,6 @@ // used only space and BT // -ERR ErrBTIOpenAndGotoRoot( PIB *ppib, const PGNO pgnoFDP, const IFMP ifmp, FUCB **ppfucb ); - ERR ErrBTIIRefresh( FUCB *pfucb, LATCH latch ); // used by recovery diff --git a/dev/ese/src/inc/bt.hxx b/dev/ese/src/inc/bt.hxx index fb831eb2..253265b0 100644 --- a/dev/ese/src/inc/bt.hxx +++ b/dev/ese/src/inc/bt.hxx @@ -11,14 +11,6 @@ // -enum OPENTYPE -{ - openNormal, // normal open cursor (may be either unique or non-unique btree) - openNormalUnique, // normal open cursor (unique btree only) - openNormalNonUnique, // normal open cursor (non-unique btree only) - openNew // open cursor on newly-created FDP -}; - // ************************************** // open/close opearations // @@ -26,55 +18,6 @@ ERR ErrBTOpen( PIB *ppib, FCB *pfcb, FUCB **ppfucb, BOOL fAllowReuse = fTrue ); ERR ErrBTOpenByProxy( PIB *ppib, FCB *pfcb, FUCB **ppfucb, const LEVEL level ); VOID BTClose( FUCB *pfucb ); -ERR ErrBTIOpen( - PIB *ppib, - const IFMP ifmp, - const PGNO pgnoFDP, - const OBJID objidFDP, - const OPENTYPE opentype, - FUCB **ppfucb, - BOOL fWillInitFCB ); - -INLINE ERR ErrBTOpen( - PIB *ppib, - const PGNO pgnoFDP, - const IFMP ifmp, - FUCB **ppfucb, - const OPENTYPE opentype = openNormal, - BOOL fWillInitFCB = fFalse ) -{ - Assert( openNormal == opentype || openNew == opentype ); - return ErrBTIOpen( - ppib, - ifmp, - pgnoFDP, - objidNil, - opentype, - ppfucb, - fWillInitFCB ); -} - -// open cursor, don't touch root page -INLINE ERR ErrBTOpenNoTouch( - PIB *ppib, - const IFMP ifmp, - const PGNO pgnoFDP, - const OBJID objidFDP, - const BOOL fUnique, - FUCB **ppfucb, - BOOL fWillInitFCB = fFalse ) -{ - Assert( objidNil != objidFDP ); - return ErrBTIOpen( - ppib, - ifmp, - pgnoFDP, - objidFDP, - fUnique ? openNormalUnique : openNormalNonUnique, - ppfucb, - fWillInitFCB ); -} - // ************************************** // retrieve/release operations diff --git a/dev/ese/src/inc/cat.hxx b/dev/ese/src/inc/cat.hxx index 385f38a9..3a7a4a2d 100644 --- a/dev/ese/src/inc/cat.hxx +++ b/dev/ese/src/inc/cat.hxx @@ -816,9 +816,9 @@ ERR ErrCATGetColumnCallbackInfo( ULONG * const pchDependantColumns ); -ERR ErrCATInitCatalogFCB( FUCB *pfucbTable ); -ERR ErrCATInitTempFCB( FUCB *pfucbTable ); -ERR ErrCATInitFCB( FUCB *pfucbTable, OBJID objidTable, const BOOL fSkipPgnoFDPLastSetTime ); +ERR ErrCATInitCatalogFCB( PIB* ppib, FCB* pfcb ); +ERR ErrCATInitTempFCB( PIB* ppib, FCB* pfcb ); +ERR ErrCATInitFCB( PIB* ppib, FCB* pfcb, OBJID objidTable, const BOOL fSkipPgnoFDPLastSetTime ); enum CATCheckIndicesFlags : ULONG // catcif { diff --git a/dev/ese/src/inc/dir.hxx b/dev/ese/src/inc/dir.hxx index aa29dd2f..cc384285 100644 --- a/dev/ese/src/inc/dir.hxx +++ b/dev/ese/src/inc/dir.hxx @@ -120,8 +120,6 @@ ERR ErrDIRCreateDirectory( // ERR ErrDIROpen( PIB *ppib, FCB *pfcb, FUCB **ppfucb ); ERR ErrDIROpenByProxy( PIB *ppib, FCB *pfcb, FUCB **ppfucb, LEVEL level ); -ERR ErrDIROpen( PIB *ppib, PGNO pgnoFDP, IFMP ifmp, FUCB **ppfucb, BOOL fWillInitFCB = fFalse ); -ERR ErrDIROpenNoTouch( PIB *ppib, IFMP ifmp, PGNO pgnoFDP, OBJID objidFDP, BOOL fUnique, FUCB **ppfucb, BOOL fWillInitFCB = fFalse ); INLINE VOID DIRInitOpenedCursor( FUCB * const pfucb, const LEVEL level ) { FUCBSetLevelNavigate( pfucb, level ); diff --git a/dev/ese/src/inc/fcb.hxx b/dev/ese/src/inc/fcb.hxx index 8112f276..fa596bc8 100644 --- a/dev/ese/src/inc/fcb.hxx +++ b/dev/ese/src/inc/fcb.hxx @@ -883,13 +883,13 @@ private: BOOL FInitialIndex() const; VOID SetInitialIndex(); - BOOL FInitialized() const; + BOOL FInitialized() const volatile; private: VOID SetInitialized_(); VOID ResetInitialized_(); public: - BOOL FInitedForRecovery() const; + BOOL FInitedForRecovery() const volatile; VOID SetInitedForRecovery(); VOID ResetInitedForRecovery(); @@ -900,9 +900,9 @@ private: VOID SetVersioningOffForExtentPageCountCache(); VOID ResetVersioningOffForExtentPageCountCache(); - BOOL FDoingAdditionalInitializationDuringRecovery() const; - VOID SetDoingAdditionalInitializationDuringRecovery(); - VOID ResetDoingAdditionalInitializationDuringRecovery(); + VOID AcquireAdditionalInitDuringRecovery(); + VOID ReleaseAdditionalInitDuringRecovery(); + BOOL FDoingAdditionalInitializationDuringRecovery() const volatile; BOOL FInList() const; VOID SetInList(); @@ -932,7 +932,7 @@ private: VOID SetPreread(); VOID ResetPreread(); - BOOL FSpaceInitialized() const; + BOOL FSpaceInitialized() const volatile; VOID SetSpaceInitialized(); VOID ResetSpaceInitialized(); @@ -1023,7 +1023,7 @@ private: VOID ResetUpdatingAndLeaveDML(); VOID ResetUpdating(); - VOID IncrementRefCount(); + VOID IncrementRefCount( BOOL fOwnWriteLock = fFalse ); private: VOID ResetUpdating_(); @@ -1064,7 +1064,7 @@ private: // ===================================================================== // FCB creation/deletion. public: - static FCB *PfcbFCBGet( const IFMP ifmp, const PGNO pgnoFDP, FCBStateFlags* const pfcbsf = NULL, const BOOL fIncrementRefCount = fTrue, const BOOL fInitForRecovery = fFalse, OBJID* const pobjid = NULL ); + static FCB *PfcbFCBGet( const IFMP ifmp, const PGNO pgnoFDP, FCBStateFlags* const pfcbsf = NULL, const BOOL fIncrementRefCount = fTrue ); static ERR ErrCreate( PIB *ppib, IFMP ifmp, PGNO pgnoFDP, FCB **ppfcb ); VOID CreateComplete_( ERR err, PCSTR szFile, const LONG lLine ); VOID PrepareForPurge( const BOOL fPrepareChildren = fTrue ); @@ -1079,7 +1079,7 @@ private: static BOOL FScanAndPurge_( _In_ INST * pinst, _In_ PIB * ppib, const BOOL fThreshold ); static BOOL FCloseToQuota_( INST * pinst ) { return pinst->m_cresFCB.FCloseToQuota(); }; static VOID PurgeObjects_( INST* const pinst, const IFMP ifmp, const PGNO pgnoFDP, const BOOL fTerminating ); - BOOL FCheckFreeAndPurge_( _In_ PIB *ppib, _In_ const BOOL fThreshold ); + BOOL FCheckFreeAndPurge_( _In_ const BOOL fThreshold ); VOID CloseAllCursorsOnFCB_( const BOOL fTerminating ); VOID Delete_( INST *pinst ); BOOL FHasCallbacks_( INST *pinst ); @@ -1140,7 +1140,7 @@ private: public: VOID InsertHashTable(); VOID DeleteHashTable(); - VOID Release(); + VOID Release( BOOL fPreventMoveToAvail = fFalse ); static BOOL FInHashTable( IFMP ifmp, PGNO pgnoFDB, FCB **ppfcb = NULL ); private: @@ -1449,7 +1449,7 @@ INLINE PGNO FCB::PgnoAE() const { return m_pgnoAE; } INLINE BFLatch* FCB::PBFLatchHintPgnoAE() { Assert( NULL == m_bflPgnoAE.pv ); return &m_bflPgnoAE; } INLINE IFMP FCB::Ifmp() const { return m_ifmp; } INLINE SHORT FCB::CbDensityFree() const { return m_spacehints.m_cbDensityFree; } -INLINE LONG FCB::WRefCount() const { return m_wRefCount; } +INLINE LONG FCB::WRefCount() const { return AtomicRead( const_cast( &m_wRefCount ) ); } // refcount is read without locks on multiple threads for synchronization INLINE RCE *FCB::PrceNewest() const { return m_prceNewest; } INLINE RCE *FCB::PrceOldest() const { return m_prceOldest; } INLINE USHORT FCB::CrefDomainDenyRead() const { return m_crefDomainDenyRead; } @@ -1586,7 +1586,7 @@ INLINE VOID FCB::SetDerivedIndex() { Assert( IsLocked() ); AtomicEx INLINE BOOL FCB::FInitialIndex() const { return !!(m_ulFCBFlags & mskFCBInitialIndex ); } INLINE VOID FCB::SetInitialIndex() { Assert( IsLocked() ); AtomicExchangeSet( &m_ulFCBFlags, mskFCBInitialIndex ); } -INLINE BOOL FCB::FInitialized() const { return !!(m_ulFCBFlags & mskFCBInitialized ); } +INLINE BOOL FCB::FInitialized() const volatile { return !!(m_ulFCBFlags & mskFCBInitialized ); } INLINE VOID FCB::SetInitialized_() { Assert( IsLocked() ); AtomicExchangeSet( &m_ulFCBFlags, mskFCBInitialized ); } INLINE VOID FCB::ResetInitialized_() { Assert( IsLocked() ); AtomicExchangeReset( &m_ulFCBFlags, mskFCBInitialized ); } @@ -1594,13 +1594,11 @@ INLINE BOOL FCB::FVersioningOffForExtentPageCountCache() const { return !!(m_ulF INLINE VOID FCB::SetVersioningOffForExtentPageCountCache() { AtomicExchangeSet( &m_ulFCBFlags, mskFCBVersioningOff ); } INLINE VOID FCB::ResetVersioningOffForExtentPageCountCache() { AtomicExchangeReset( &m_ulFCBFlags, mskFCBVersioningOff ); } -INLINE BOOL FCB::FInitedForRecovery() const { return !!(m_ulFCBFlags & mskFCBInitedForRecovery ); } -INLINE VOID FCB::SetInitedForRecovery() { Assert( IsLocked() ); AtomicExchangeSet( &m_ulFCBFlags, mskFCBInitedForRecovery ); } -INLINE VOID FCB::ResetInitedForRecovery() { Assert( IsLocked() ); AtomicExchangeReset( &m_ulFCBFlags, mskFCBInitedForRecovery ); } +INLINE BOOL FCB::FInitedForRecovery() const volatile { return !!( m_ulFCBFlags & mskFCBInitedForRecovery ); } +INLINE VOID FCB::SetInitedForRecovery() { Assert( IsLocked() ); AtomicExchangeSet( &m_ulFCBFlags, mskFCBInitedForRecovery ); } +INLINE VOID FCB::ResetInitedForRecovery() { Assert( IsLocked() ); AtomicExchangeReset( &m_ulFCBFlags, mskFCBInitedForRecovery ); } -INLINE BOOL FCB::FDoingAdditionalInitializationDuringRecovery() const { return !!(m_ulFCBFlags & mskFCBDoingAdditionalInitializationDuringRecovery ); } -INLINE VOID FCB::SetDoingAdditionalInitializationDuringRecovery() { Assert( IsLocked() ); AtomicExchangeSet( &m_ulFCBFlags, mskFCBDoingAdditionalInitializationDuringRecovery ); } -INLINE VOID FCB::ResetDoingAdditionalInitializationDuringRecovery() { Assert( IsLocked() ); AtomicExchangeReset( &m_ulFCBFlags, mskFCBDoingAdditionalInitializationDuringRecovery ); } +INLINE BOOL FCB::FDoingAdditionalInitializationDuringRecovery() const volatile { return !!( m_ulFCBFlags & mskFCBDoingAdditionalInitializationDuringRecovery ); } INLINE BOOL FCB::FInList() const { return m_fFCBInList; } INLINE VOID FCB::SetInList() { m_fFCBInList = fTrue; } @@ -1630,9 +1628,9 @@ INLINE BOOL FCB::FPreread() const { return !!(m_ulFCBFlags & mskFC INLINE VOID FCB::SetPreread() { Assert( IsLocked() ); AtomicExchangeSet( &m_ulFCBFlags, mskFCBPreread ); } INLINE VOID FCB::ResetPreread() { Assert( IsLocked() ); AtomicExchangeReset( &m_ulFCBFlags, mskFCBPreread ); } -INLINE BOOL FCB::FSpaceInitialized() const { return !!(m_ulFCBFlags & mskFCBSpaceInitialized ); } -INLINE VOID FCB::SetSpaceInitialized() { Assert( IsLocked() ); AtomicExchangeSet( &m_ulFCBFlags, mskFCBSpaceInitialized ); } -INLINE VOID FCB::ResetSpaceInitialized() { Assert( IsLocked() ); AtomicExchangeReset( &m_ulFCBFlags, mskFCBSpaceInitialized ); } +INLINE BOOL FCB::FSpaceInitialized() const volatile { return !!(m_ulFCBFlags & mskFCBSpaceInitialized ); } +INLINE VOID FCB::SetSpaceInitialized() { Assert( IsLocked() ); AtomicExchangeSet( &m_ulFCBFlags, mskFCBSpaceInitialized ); } +INLINE VOID FCB::ResetSpaceInitialized() { Assert( IsLocked() ); AtomicExchangeReset( &m_ulFCBFlags, mskFCBSpaceInitialized ); } INLINE BOOL FCB::FTryPurgeOnClose() const { return !!(m_ulFCBFlags & mskFCBTryPurgeOnClose ); } INLINE VOID FCB::SetTryPurgeOnClose() { Assert( IsLocked() ); AtomicExchangeSet( &m_ulFCBFlags, mskFCBTryPurgeOnClose ); } @@ -1945,7 +1943,7 @@ INLINE VOID FCB::AssertDDL() // ========================================================================= // Hashing. -INLINE VOID FCB::Release() +INLINE VOID FCB::Release( BOOL fPreventMoveToAvail /* = fFalse */ ) { #ifdef DEBUG FCB *pfcbT; @@ -1966,7 +1964,7 @@ INLINE VOID FCB::Release() Assert( pfcbT == this || ( FDeleteCommitted() && ObjidFDP() != pfcbT->ObjidFDP() ) ); } #endif // DEBUG - DecrementRefCountAndUnlink_( pfucbNil, fTrue ); + DecrementRefCountAndUnlink_( pfucbNil, fTrue, fPreventMoveToAvail ); } INLINE BOOL FCB::FInHashTable( IFMP ifmp, PGNO pgnoFDP, FCB **ppfcb ) diff --git a/dev/ese/src/inc/file.hxx b/dev/ese/src/inc/file.hxx index e7a1d5ef..670ebcf0 100644 --- a/dev/ese/src/inc/file.hxx +++ b/dev/ese/src/inc/file.hxx @@ -64,11 +64,39 @@ INLINE VOID FILEFreeDefaultRecord( FUCB *pfucbFake ) VOID FILETableMustRollback( PIB *ppib, FCB *pfcbTable ); +// Guard object to help with FCB lifetime. +// Uses unique_ptr with a custom deleter to decrement FCB refcount when the FCB reference goes out of scope. +// Purges (deletes) semi-intialized FCBs. Required because an FCB is only added to the inst's FCB list +// after being fully initialized. +class FCBRefDeleter +{ +public: + void operator() ( FCB* pfcb ); +}; + +// A reference to an FCB. Works with different FCB lifetime states and refcount to ensure proper access to an FCB. +using FCBRef = std::unique_ptr; + +enum OPENTYPE +{ + openNormal, // normal open cursor (may be either unique or non-unique btree) + openNormalNoTouch, // normal open cursor, doesn't read pgnoFDP + openNew // open cursor on newly-created FDP +}; + +ERR ErrFILEIOpenFCB( + PIB *ppib, + IFMP ifmp, + PGNO pgnoFDP, + OBJID objidFDP, + OPENTYPE opentype, + FCBRef& fcbRef ); + ERR ErrFILEIInitializeFCB( PIB *ppib, IFMP ifmp, TDB *ptdb, - FCB *ppfcbNew, + FCB *pfcbNew, IDB *pidb, BOOL fPrimary, PGNO pgnoFDP, @@ -79,7 +107,9 @@ VOID FILESetAllIndexMask( FCB *pfcbTable ); ERR ErrFILEDeleteTable( PIB *ppib, IFMP ifmp, const CHAR *szTable, const BOOL fAllowTableDeleteSensitive = fFalse, const JET_GRBIT grbit = NO_GRBIT ); FIELD *PfieldFCBFromColumnName( FCB *pfcb, _In_ PCSTR szColumnName ); - + +ERR ErrFILEFcbGet( PIB* ppib, IFMP ifmp, PGNO pgnoFDP, OBJID objidFDP, FCBRef& pfcbRef ); +ERR ErrFILEFcbGetNoTouch( PIB* ppib, IFMP ifmp, PGNO pgnoFDP, OBJID objidFDP, FCBRef& pfcbRef ); FCB *PfcbFCBFromIndexName( FCB *pfcbTable, _In_ PCSTR szName ); struct FDPINFO @@ -278,3 +308,34 @@ INLINE VOID FILEReleaseCurrentSecondary( FUCB *pfucb ) } } +INLINE void FCBRefDeleter::operator()( FCB* pfcb ) +{ + if ( pfcb != pfcbNil ) + { + if ( pfcb->FInitialized() ) + { + pfcb->Release( !pfcb->FTypeTable() /* fPreventMoveToAvail */ ); //only table FCBs may move to avail list + } + else + { + // we own the FCB bcause we are in the create path (we're closing because the FCB was created + // but not fully initialized, or because an error + // occurred during FILEOpenTable()) + +#ifdef DEBUG + pfcb->Lock(); + pfcb->FucbList().LockForEnumeration(); + Assert( pfcb->FucbList().Count() == 0 ); + pfcb->FucbList().UnlockForEnumeration(); + pfcb->Unlock(); +#endif + + // synchronously purge the FCB + Assert( pfcb->WRefCount() == 1 ); // we should be the only reference + pfcb->Release( fTrue /* fPreventMoveToAvail */ ); + pfcb->PrepareForPurge(); + pfcb->Purge(); + } + } +} + diff --git a/dev/ese/src/inc/fucb.hxx b/dev/ese/src/inc/fucb.hxx index ecf3523a..a2f25c4f 100644 --- a/dev/ese/src/inc/fucb.hxx +++ b/dev/ese/src/inc/fucb.hxx @@ -519,6 +519,7 @@ INLINE BOOL FFUCBSpace( const FUCB *pfucb ) INLINE BOOL FFUCBUnique( const FUCB *pfucb ) { Assert( pfcbNil != pfucb->u.pfcb ); + Assert( pfucb->u.pfcb->FInitialized() ); // unique flag is only valid after full FCB initialization const BOOL fUnique = ( pfucb->u.pfcb->FUnique() || FFUCBSpace( pfucb ) ); diff --git a/dev/ese/src/inc/space.hxx b/dev/ese/src/inc/space.hxx index 04136208..fc7e1794 100644 --- a/dev/ese/src/inc/space.hxx +++ b/dev/ese/src/inc/space.hxx @@ -146,7 +146,7 @@ VOID SPTerm(); // initialize Root page // returns pfucb placed on Root // -ERR ErrSPInitFCB( _Inout_ FUCB * const pfucb ); +ERR ErrSPInitFCB( _In_ PIB * ppib, _Inout_ FCB * const pfcb ); ERR ErrSPDeferredInitFCB( _Inout_ FUCB * const pfucb ); ERR ErrSPGetLastPgno( _Inout_ PIB * ppib, _In_ const IFMP ifmp, _Out_ PGNO * ppgno ); ERR ErrSPGetLastExtent( _Inout_ PIB * ppib, _In_ const IFMP ifmp, _Out_ EXTENTINFO * pextinfo ); From 5d2a38d2b6983f7eb5ca063b3653bf735c1a9410 Mon Sep 17 00:00:00 2001 From: Anil Ruia Date: Fri, 4 Nov 2022 08:48:17 +0000 Subject: [PATCH 084/102] Fix eacr violation [Substrate:420b32c1b9212c12f789ac2943a227dbe9eaafe6] --- dev/ese/src/os/encrypt2.cxx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/ese/src/os/encrypt2.cxx b/dev/ese/src/os/encrypt2.cxx index ad7e02b1..514d7fb6 100644 --- a/dev/ese/src/os/encrypt2.cxx +++ b/dev/ese/src/os/encrypt2.cxx @@ -61,7 +61,7 @@ ErrOSIBCryptAESProviderInit() } if ( cbData != sizeof( g_cbKeyObject ) ) { - Error( JET_errInvalidParameter ); + Error( ErrERRCheck( JET_errInvalidParameter ) ); } ULONG cbBlockSize = 0; @@ -78,7 +78,7 @@ ErrOSIBCryptAESProviderInit() Assert( cbBlockSize == BLOCK_SIZE_AES256 ); if ( cbData != sizeof( cbBlockSize ) || cbBlockSize != BLOCK_SIZE_AES256 ) { - Error( JET_errInvalidParameter ); + Error( ErrERRCheck( JET_errInvalidParameter ) ); } HandleError: From a3c051d4e877a09b181cebd14d79f36005bcc51c Mon Sep 17 00:00:00 2001 From: Build Team Date: Fri, 4 Nov 2022 23:25:26 +0000 Subject: [PATCH 085/102] This is a fast revert due to inner ring stability issue. [Substrate:3755b45d2ed0e28602144622fef57629774cf110] --- dev/ese/src/ese/_log/logredo.cxx | 2 +- dev/ese/src/ese/bt.cxx | 275 ++++++++++++++++++++++++++++++- dev/ese/src/ese/cat.cxx | 137 ++++++++------- dev/ese/src/ese/dbshrink.cxx | 19 ++- dev/ese/src/ese/dbtask.cxx | 4 +- dev/ese/src/ese/dbutil.cxx | 66 ++++---- dev/ese/src/ese/dir.cxx | 57 +++++++ dev/ese/src/ese/esedump.cxx | 12 +- dev/ese/src/ese/fcb.cxx | 109 ++++++------ dev/ese/src/ese/fcreate.cxx | 137 +++++++-------- dev/ese/src/ese/fileopen.cxx | 252 +--------------------------- dev/ese/src/ese/lv.cxx | 9 +- dev/ese/src/ese/repair.cxx | 41 ++--- dev/ese/src/ese/space.cxx | 226 +++++++++++-------------- dev/ese/src/inc/_bt.hxx | 2 + dev/ese/src/inc/bt.hxx | 57 +++++++ dev/ese/src/inc/cat.hxx | 6 +- dev/ese/src/inc/dir.hxx | 2 + dev/ese/src/inc/fcb.hxx | 44 ++--- dev/ese/src/inc/file.hxx | 65 +------- dev/ese/src/inc/fucb.hxx | 1 - dev/ese/src/inc/space.hxx | 2 +- 22 files changed, 786 insertions(+), 739 deletions(-) diff --git a/dev/ese/src/ese/_log/logredo.cxx b/dev/ese/src/ese/_log/logredo.cxx index fede9c3d..c8f06f4a 100644 --- a/dev/ese/src/ese/_log/logredo.cxx +++ b/dev/ese/src/ese/_log/logredo.cxx @@ -1005,7 +1005,7 @@ LOCAL ERR ErrLGRICreateFucb( // get fcb for table, if one exists // - pfcb = FCB::PfcbFCBGet( ifmp, pgnoFDP, &fcbsf, fTrue /* FIncrementRefCount */ ); + pfcb = FCB::PfcbFCBGet( ifmp, pgnoFDP, &fcbsf, fTrue /* FIncrementRefCount */, fTrue /* fInitForRecovery */); Assert( pfcbNil == pfcb || ( fcbsf & fcbsfInitialized ) ); if ( pfcbNil == pfcb ) { diff --git a/dev/ese/src/ese/bt.cxx b/dev/ese/src/ese/bt.cxx index f78ea6ee..2abe6172 100644 --- a/dev/ese/src/ese/bt.cxx +++ b/dev/ese/src/ese/bt.cxx @@ -499,6 +499,7 @@ ERR ErrBTOpen( PIB *ppib, FCB *pfcb, FUCB **ppfucb, BOOL fAllowReuse ) FUCB *pfucb; Assert( pfcb != pfcbNil ); + Assert( pfcb->FInitialized() ); // In most cases, we should reuse a deferred-closed FUCB. The one // time we don't want to is if we're opening a space cursor. @@ -788,7 +789,24 @@ VOID BTClose( FUCB *pfucb ) pfcb->ResetDomainDenyWrite(); } - if ( pfcb->FTypeTable() ) + if ( !pfcb->FInitialized() ) + { + + // we own the FCB (we're closing because the FCB was created during + // a DIROpen() of a DIRCreateDirectory() or because an error + // occurred during FILEOpenTable()) + + // unlink the FUCB from the FCB without moving the FCB to the + // avail LRU list (this prevents the FCB from being purged) + + pfucb->u.pfcb->Unlink( pfucb, fTrue ); + + // synchronously purge the FCB + + pfcb->PrepareForPurge(); + pfcb->Purge(); + } + else if ( pfcb->FTypeTable() ) { // only table FCBs can be moved to the avail-LRU list @@ -7421,7 +7439,6 @@ ERR ErrBTFindFragmentedRange( ERR ErrBTDumpPageUsage( PIB * ppib, const IFMP ifmp, const PGNO pgnoFDP ) { ERR err = JET_errSuccess; - FCBRef fcbRef; FUCB * pfucb = pfucbNil; DIB dib; CSR * pcsr; @@ -7447,8 +7464,8 @@ ERR ErrBTDumpPageUsage( PIB * ppib, const IFMP ifmp, const PGNO pgnoFDP ) if ( pgnoNull != pgnoFDP ) { - Call( ErrFILEFcbGet( ppib, ifmp, pgnoFDP, objidNil, fcbRef ) ); - Call( ErrBTOpen( ppib, fcbRef.get(), &pfucb ) ); + + Call( ErrBTOpen( ppib, pgnoFDP, ifmp, &pfucb ) ); FUCBSetIndex( pfucb ); // we will be traversing the entire tree in order, preread all the pages @@ -7609,6 +7626,229 @@ ERR ErrBTDumpPageUsage( PIB * ppib, const IFMP ifmp, const PGNO pgnoFDP ) +// ****************************************************** +// SPECIAL OPERATIONS +// + + +INLINE ERR ErrBTICreateFCB( + PIB *ppib, + const IFMP ifmp, + const PGNO pgnoFDP, + const OBJID objidFDP, + const OPENTYPE opentype, + FUCB **ppfucb ) +{ + ERR err; + FCB *pfcb = pfcbNil; + FUCB *pfucb = pfucbNil; + + // create a new FCB + + CallR( FCB::ErrCreate( ppib, ifmp, pgnoFDP, &pfcb ) ); + + // the creation was successful + + Assert( pfcb->IsLocked() ); + Assert( pfcb->FTypeNull() ); // No fcbtype yet. + Assert( pfcb->Ifmp() == ifmp ); + Assert( pfcb->PgnoFDP() == pgnoFDP ); + Assert( !pfcb->FInitialized() ); + Assert( pfcb->WRefCount() == 0 ); + pfcb->Unlock(); + + Call( ErrFUCBOpen( ppib, ifmp, &pfucb ) ); + Call( pfcb->ErrLink( pfucb ) ); + + Assert( !pfcb->FSpaceInitialized() ); + Assert( openNew != opentype || objidNil == objidFDP ); + if ( openNew != opentype ) + { + if ( objidNil == objidFDP ) + { + Assert( openNormal == opentype ); + + // read space info into FCB cache, including objid + Call( ErrSPInitFCB( pfucb ) ); + Assert( g_fRepair || pfcb->FSpaceInitialized() ); + } + else + { + pfcb->SetObjidFDP( objidFDP ); + if ( openNormalNonUnique == opentype ) + { + pfcb->Lock(); + pfcb->SetNonUnique(); + pfcb->Unlock(); + } + else + { + Assert( pfcb->FUnique() ); // btree is initially assumed to be unique + Assert( openNormalUnique == opentype ); + } + Assert( !pfcb->FSpaceInitialized() ); + } + } + + if ( pgnoFDP == pgnoSystemRoot ) + { + // SPECIAL CASE: For database cursor, we've got all the + // information we need. + + // when opening db cursor, always force to check the root page + Assert( objidNil == objidFDP ); + if ( openNew == opentype ) + { + // objid will be set when we return to ErrSPCreate() + Assert( objidNil == pfcb->ObjidFDP() ); + } + else + { + Assert( objidSystemRoot == pfcb->ObjidFDP() ); + } + + // insert this FCB into the global list + + pfcb->InsertList(); + + // finish initializing this FCB + + pfcb->Lock(); + Assert( pfcb->FTypeNull() ); + pfcb->SetTypeDatabase(); + pfcb->CreateComplete(); + pfcb->Unlock(); + } + + *ppfucb = pfucb; + Assert( !Pcsr( pfucb )->FLatched() ); + + return err; + +HandleError: + Assert( pfcbNil != pfcb ); + Assert( !pfcb->FInitialized() ); + Assert( !pfcb->FInList() ); + Assert( !pfcb->FInLRU() ); + Assert( ptdbNil == pfcb->Ptdb() ); + Assert( pfcbNil == pfcb->PfcbNextIndex() ); + Assert( pidbNil == pfcb->Pidb() ); + + if ( pfucbNil != pfucb ) + { + if ( pfcbNil != pfucb->u.pfcb ) + { + Assert( pfcb == pfucb->u.pfcb ); + // We managed to link the FUCB to the FCB before we errored. + pfcb->Unlink( pfucb, fTrue ); + } + + // close the FUCB + FUCBClose( pfucb ); + } + + // synchronously purge the FCB + pfcb->PrepareForPurge( fFalse ); + pfcb->Purge( fFalse ); + + return err; +} + + +// ***************************************************** +// BTREE INTERNAL ROUTINES +// + +// opens a cursor on a tree rooted at pgnoFDP +// open cursor on corresponding FCB if it is in cache [common case] +// if FCB not in cache, create one, link with cursor +// and initialize FCB space info +// if fNew is set, this is a new tree, +// so do not initialize FCB space info +// fWillInitFCB: On a passive, is the caller planning to fully hydrate the placeholder FCB? +// +ERR ErrBTIOpen( + PIB *ppib, + const IFMP ifmp, + const PGNO pgnoFDP, + const OBJID objidFDP, + const OPENTYPE opentype, + FUCB **ppfucb, + BOOL fWillInitFCB ) +{ + ERR err; + FCB *pfcb; + FCBStateFlags fcbsf; + ULONG cRetries = 0; + PIBTraceContextScope tcScope = ppib->InitTraceContextScope( ); + tcScope->iorReason.SetIors( iorsBTOpen ); + +RetrieveFCB: + AssertTrack( cRetries != 100000, "TooManyFcbOpenRetries" ); + + // get the FCB for the given ifmp/pgnoFDP + + pfcb = FCB::PfcbFCBGet( ifmp, pgnoFDP, &fcbsf, fTrue, !fWillInitFCB ); + if ( pfcb == pfcbNil ) + { + + // the FCB does not exist + + Assert( fcbsfNone == fcbsf ); + + // try to create a new B-tree which will cause the creation of the new FCB + + err = ErrBTICreateFCB( ppib, ifmp, pgnoFDP, objidFDP, opentype, ppfucb ); + Assert( err <= JET_errSuccess ); // Shouldn't return warnings. + + if ( err == errFCBExists ) + { + + // we failed because someone else was racing to create + // the same FCB that we want, but they beat us to it + + // try to get the FCB again + + UtilSleep( 10 ); + cRetries++; + goto RetrieveFCB; + } + Call( err ); + + tcScope->nParentObjectClass = TceFromFUCB( *ppfucb ); + } + else + { + tcScope->nParentObjectClass = pfcb->TCE(); + + if ( fcbsf & fcbsfInitialized ) + { + Assert( pfcb->WRefCount() >= 1); + err = ErrBTOpen( ppib, pfcb, ppfucb ); + + // Cursor has been opened on FCB, so refcount should be + // at least 2 (one for cursor, one for call to PfcbFCBGet()). + // (if ErrBTOpen returns w/o error) + Assert( pfcb->WRefCount() > 1 || (1 == pfcb->WRefCount() && err < JET_errSuccess) ); + + pfcb->Release(); + } + else + { + FireWall( "DeprecatedSentinelFcbBtOpen" ); // Sentinel FCBs are believed deprecated + Assert( !FFMPIsTempDB( ifmp ) ); // Sentinels not used by sort/temp. tables. + + // If we encounter a sentinel, it means the + // table has been locked for subsequent deletion. + err = ErrERRCheck( JET_errTableLocked ); + } + } + +HandleError: + return err; +} + + // ************************************************* // movement operations // @@ -7665,6 +7905,33 @@ ERR ErrBTIGotoRoot( FUCB *pfucb, LATCH latch ) return JET_errSuccess; } +ERR ErrBTIOpenAndGotoRoot( PIB *ppib, const PGNO pgnoFDP, const IFMP ifmp, FUCB **ppfucb ) +{ + ERR err; + FUCB *pfucb; + + CallR( ErrBTIOpen( ppib, ifmp, pgnoFDP, objidNil, openNormal, &pfucb, fFalse ) ); + Assert( pfucbNil != pfucb ); + Assert( pfcbNil != pfucb->u.pfcb ); + Assert( pfucb->u.pfcb->FInitialized() ); + + err = ErrBTIGotoRoot( pfucb, latchRIW ); + if ( err < JET_errSuccess ) + { + BTClose( pfucb ); + } + else + { + Assert( latchRIW == Pcsr( pfucb )->Latch() ); + Assert( pcsrNil == pfucb->pcsrRoot ); + pfucb->pcsrRoot = Pcsr( pfucb ); + + *ppfucb = pfucb; + } + + return err; +} + // this is the uncommon case in the refresh logic // where we lost physical currency on page // diff --git a/dev/ese/src/ese/cat.cxx b/dev/ese/src/ese/cat.cxx index d8ca9491..91748af0 100644 --- a/dev/ese/src/ese/cat.cxx +++ b/dev/ese/src/ese/cat.cxx @@ -457,18 +457,17 @@ INLINE ERR ErrCATICreateCatalogIndexes( FUCB *pfucbTableExtent; PGNO pgnoIndexFDP; FCB *pfcb = pfcbNil; - FCBRef fcbRef; // don't maintain secondary indexes on the shadow catalog. // Open cursor for space navigation - CallR( ErrFILEFcbGet( ppib, ifmp, pgnoFDPMSO, objidFDPMSO, fcbRef ) ); - CallR( ErrDIROpen( ppib, fcbRef.get(), &pfucbTableExtent ) ); + CallR( ErrDIROpen( ppib, pgnoFDPMSO, ifmp, &pfucbTableExtent ) ); pfcb = pfucbTableExtent->u.pfcb; Assert( pfucbTableExtent != pfucbNil ); Assert( !FFUCBVersioned( pfucbTableExtent ) ); // Verify won't be deferred closed. + Assert( pfcb != pfcbNil ); Assert( !pfcb->FInitialized() ); Assert( pfcb->Pidb() == pidbNil ); @@ -498,9 +497,9 @@ INLINE ERR ErrCATICreateCatalogIndexes( HandleError: Assert( pfcb->FInitialized() ); - Assert( pfcb->WRefCount() == 2 ); // 1 for fcbRef, 1 for pfucbTableExtent + Assert( pfcb->WRefCount() == 1 ); - // force the FCB to be uninitialized so it will be purged by FCBRef deleter + // force the FCB to be uninitialized so it will be purged by DIRClose pfcb->Lock(); pfcb->CreateCompleteErr( errFCBUnusable ); @@ -2248,7 +2247,6 @@ ERR ErrREPAIRCATCreate( ERR ErrCATCreate( PIB *ppib, const IFMP ifmp, const BOOL fReplayCreateDbImplicitly ) { ERR err; - FCBRef fcbRef; FUCB *pfucb = pfucbNil; PGNO pgnoFDP; PGNO pgnoFDPShadow; @@ -2276,8 +2274,7 @@ ERR ErrCATCreate( PIB *ppib, const IFMP ifmp, const BOOL fReplayCreateDbImplicit // allocate cursor // - CallR( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRef ) ); - CallR( ErrDIROpen( ppib, fcbRef.get(), &pfucb ) ); + CallR( ErrDIROpen( ppib, pgnoSystemRoot, ifmp, &pfucb ) ); Assert( pfucbNil != pfucb ); Assert( cpgMSOInitial > cpgTableMin ); Call( ErrDIRCreateDirectory( @@ -2321,8 +2318,6 @@ ERR ErrCATCreate( PIB *ppib, const IFMP ifmp, const BOOL fReplayCreateDbImplicit DIRClose( pfucb ); } - fcbRef.reset(); // do we need to release the FCB before trx rollback? - if( err < 0 ) { CallSx( ErrDIRRollback( ppib ), JET_errRollbackError ); @@ -5456,10 +5451,12 @@ LOCAL VOID CATIFreeSecondaryIndexes( FCB *pfcbSecondaryIndexes ) /* get index info of a system table index /**/ -ERR ErrCATInitCatalogFCB( PIB* ppib, FCB* pfcb ) +ERR ErrCATInitCatalogFCB( FUCB *pfucbTable ) { ERR err; - const IFMP ifmp = pfcb->Ifmp(); + PIB *ppib = pfucbTable->ppib; + const IFMP ifmp = pfucbTable->ifmp; + FCB *pfcb = pfucbTable->u.pfcb; TDB *ptdb = ptdbNil; IDB idb( PinstFromIfmp( ifmp ) ); UINT iIndex; @@ -5535,7 +5532,7 @@ ERR ErrCATInitCatalogFCB( PIB* ppib, FCB* pfcb ) } else if ( !fShadow ) { - FCBRef fcbRefSecondaryIndex; + FUCB *pfucbSecondaryIndex; PGNO pgnoIndexFDP; OBJID objidIndexFDP; @@ -5561,33 +5558,46 @@ ERR ErrCATInitCatalogFCB( PIB* ppib, FCB* pfcb ) } Assert( idb.FUnique() ); // all catalog indexes are unique - note redundant w/ above assert. + Call( ErrDIROpenNoTouch( + ppib, + ifmp, + pgnoIndexFDP, + objidIndexFDP, + fTrue, // all catalog indexes are unique + &pfucbSecondaryIndex, + fTrue ) ); // will initialize FCB - Call( ErrFILEFcbGetNoTouch( ppib, ifmp, pgnoIndexFDP, objidIndexFDP, fcbRefSecondaryIndex ) ); + Assert( !pfucbSecondaryIndex->u.pfcb->FInitialized() || pfucbSecondaryIndex->u.pfcb->FInitedForRecovery() ); - Assert( !fcbRefSecondaryIndex->FInitialized() || fcbRefSecondaryIndex->FInitedForRecovery() ); - Call( ErrFILEIInitializeFCB( + err = ErrFILEIInitializeFCB( ppib, ifmp, ptdb, - fcbRefSecondaryIndex.get(), + pfucbSecondaryIndex->u.pfcb, &idb, fFalse, pgnoIndexFDP, PSystemSpaceHints(eJSPHDefaultUserTable), - NULL ) ); + NULL ); + if ( err < 0 ) + { + DIRClose( pfucbSecondaryIndex ); + goto HandleError; + } - fcbRefSecondaryIndex->SetPfcbNextIndex( pfcbSecondaryIndexes ); - pfcbSecondaryIndexes = fcbRefSecondaryIndex.get(); + pfucbSecondaryIndex->u.pfcb->SetPfcbNextIndex( pfcbSecondaryIndexes ); + pfcbSecondaryIndexes = pfucbSecondaryIndex->u.pfcb; - Assert( !fcbRefSecondaryIndex->FInList() ); + Assert( !pfucbSecondaryIndex->u.pfcb->FInList() ); - // mark the secondary index fcb as being initialized successfully - // this protects it from being purged when fcbRef goes out of scope + // mark the secondary index as being initialized successfully - fcbRefSecondaryIndex->Lock(); - fcbRefSecondaryIndex->CreateComplete(); - fcbRefSecondaryIndex->ResetInitedForRecovery(); - fcbRefSecondaryIndex->Unlock(); + pfucbSecondaryIndex->u.pfcb->Lock(); + pfucbSecondaryIndex->u.pfcb->CreateComplete(); + pfucbSecondaryIndex->u.pfcb->ResetInitedForRecovery(); + pfucbSecondaryIndex->u.pfcb->Unlock(); + + DIRClose( pfucbSecondaryIndex ); } } @@ -7721,7 +7731,7 @@ LOCAL ERR ErrCATIInitIndexFCBs( } else { - FCBRef fcbRefSecondaryIndex; + FUCB *pfucbSecondaryIndex; Assert( pgnoIndexFDP != pfcb->PgnoFDP() || g_fRepair ); @@ -7733,36 +7743,48 @@ LOCAL ERR ErrCATIInitIndexFCBs( *pfSecondaryPgnoFDPLastSetRequired = true; } - Call( ErrFILEFcbGetNoTouch( ppib, ifmp, pgnoIndexFDP, objidIndexFDP, fcbRefSecondaryIndex ) ); - - Assert( !fcbRefSecondaryIndex->FInitialized() || fcbRefSecondaryIndex->FInitedForRecovery() ); - Call( ErrFILEIInitializeFCB( + Call( ErrDIROpenNoTouch( + ppib, + ifmp, + pgnoIndexFDP, + objidIndexFDP, + idb.FUnique(), + &pfucbSecondaryIndex, + fTrue ) ); // Will initialize FCB + Assert( !pfucbSecondaryIndex->u.pfcb->FInitialized() || pfucbSecondaryIndex->u.pfcb->FInitedForRecovery() ); + + err = ErrFILEIInitializeFCB( ppib, ifmp, ptdb, - fcbRefSecondaryIndex.get(), + pfucbSecondaryIndex->u.pfcb, &idb, fFalse, pgnoIndexFDP, &jsph, - pfcbTemplate ) ); + pfcbTemplate ); + if ( err < 0 ) + { + DIRClose( pfucbSecondaryIndex ); + goto HandleError; + } + Assert( pfucbSecondaryIndex->u.pfcb->ObjidFDP() == objidIndexFDP ); - Assert( fcbRefSecondaryIndex->ObjidFDP() == objidIndexFDP ); + pfucbSecondaryIndex->u.pfcb->SetFileTimePgnoFDPLastSet( ftPgnoFDPLastSet ); + pfucbSecondaryIndex->u.pfcb->SetPfcbNextIndex( pfcbSecondaryIndexes ); + pfcbSecondaryIndexes = pfucbSecondaryIndex->u.pfcb; - fcbRefSecondaryIndex->SetFileTimePgnoFDPLastSet( ftPgnoFDPLastSet ); - fcbRefSecondaryIndex->SetPfcbNextIndex( pfcbSecondaryIndexes ); - pfcbSecondaryIndexes = fcbRefSecondaryIndex.get(); + Assert( !pfucbSecondaryIndex->u.pfcb->FInList() ); - Assert( !fcbRefSecondaryIndex->FInList() ); + // mark the secondary index as being initialized successfully - // mark the secondary index fcb as being initialized successfully - // this protects it from being purged when fcbRef goes out of scope + pfucbSecondaryIndex->u.pfcb->Lock(); + pfucbSecondaryIndex->u.pfcb->SetInitialIndex(); + pfucbSecondaryIndex->u.pfcb->CreateComplete(); + pfucbSecondaryIndex->u.pfcb->ResetInitedForRecovery(); + pfucbSecondaryIndex->u.pfcb->Unlock(); - fcbRefSecondaryIndex->Lock(); - fcbRefSecondaryIndex->SetInitialIndex(); - fcbRefSecondaryIndex->CreateComplete(); - fcbRefSecondaryIndex->ResetInitedForRecovery(); - fcbRefSecondaryIndex->Unlock(); + DIRClose( pfucbSecondaryIndex ); } Assert( locOnCurBM == pfucbCatalog->locLogical ); @@ -8177,12 +8199,14 @@ LOCAL ERR ErrCATIInitCallbacks( return err; } -ERR ErrCATInitFCB( PIB* ppib, FCB* pfcb, OBJID objidTable, const BOOL fSkipPgnoFDPLastSetTime ) +ERR ErrCATInitFCB( FUCB *pfucbTable, OBJID objidTable, const BOOL fSkipPgnoFDPLastSetTime ) { ERR err; + PIB *ppib = pfucbTable->ppib; INST *pinst = PinstFromPpib( ppib ); - const IFMP ifmp = pfcb->Ifmp(); + const IFMP ifmp = pfucbTable->ifmp; FUCB *pfucbCatalog = pfucbNil; + FCB *pfcb = pfucbTable->u.pfcb; TDB *ptdb = ptdbNil; FCB *pfcbTemplateTable = pfcbNil; DATA dataField; @@ -8208,8 +8232,8 @@ ERR ErrCATInitFCB( PIB* ppib, FCB* pfcb, OBJID objidTable, const BOOL fSkipPgnoF } Assert( !pfcb->FInitialized() || pfcb->FInitedForRecovery() ); - Assert( objidTable == pfcb->ObjidFDP() - || objidNil ==pfcb->ObjidFDP() && g_fRepair ); + Assert( objidTable == pfucbTable->u.pfcb->ObjidFDP() + || objidNil == pfucbTable->u.pfcb->ObjidFDP() && g_fRepair ); CallR( ErrCATOpen( ppib, ifmp, &pfucbCatalog ) ); Assert( pfucbNil != pfucbCatalog ); @@ -8626,9 +8650,11 @@ ERR ErrCATInitFCB( PIB* ppib, FCB* pfcb, OBJID objidTable, const BOOL fSkipPgnoF -ERR ErrCATInitTempFCB( PIB* ppib, FCB* pfcb ) +ERR ErrCATInitTempFCB( FUCB *pfucbTable ) { ERR err; + PIB *ppib = pfucbTable->ppib; + FCB *pfcb = pfucbTable->u.pfcb; TDB *ptdb = ptdbNil; TCIB tcib; INST *pinst = PinstFromPpib( ppib ); @@ -8639,7 +8665,7 @@ ERR ErrCATInitTempFCB( PIB* ppib, FCB* pfcb ) /* created, in which case there are no primary or secondary indexes yet. /**/ - CallR( ErrTDBCreate( pinst, pfcb->Ifmp(), &ptdb, &tcib ) ); + CallR( ErrTDBCreate( pinst, pfucbTable->ifmp, &ptdb, &tcib ) ); ptdb->SetLVChunkMost( (LONG)UlParam( JET_paramLVChunkSizeMost ) ); @@ -15740,7 +15766,6 @@ ERR ErrCATGetCursorsFromObjid( ERR err = JET_errSuccess; PGNO pgnoFDPParent = pgnoNull; PGNO pgnoFDP = pgnoNull; - FCBRef fcbRef; FUCB* pfucb = pfucbNil; FUCB* pfucbParent = pfucbNil; @@ -15750,8 +15775,7 @@ ERR ErrCATGetCursorsFromObjid( Assert( objidParent == objidNil ); Assert( sysobj == sysobjNil ); pgnoFDP = pgnoSystemRoot; - Call( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRef ) ); - Call( ErrDIROpen( ppib, fcbRef.get(), &pfucb ) ); + Call( ErrDIROpen( ppib, pgnoSystemRoot, ifmp, &pfucb ) ); pgnoFDPParent = pgnoNull; pfucbParent = pfucbNil; } @@ -15779,8 +15803,7 @@ ERR ErrCATGetCursorsFromObjid( if ( sysobj == sysobjTable ) { pgnoFDPParent = pgnoSystemRoot; - Call( ErrFILEFcbGet( ppib, ifmp, pgnoFDPParent, objidSystemRoot, fcbRef ) ); - Call( ErrDIROpen( ppib, fcbRef.get(), &pfucbParent ) ); + Call( ErrDIROpen( ppib, pgnoFDPParent, ifmp, &pfucbParent ) ); pgnoFDP = pgnoFDPTable; pfucb = pfucbTable; diff --git a/dev/ese/src/ese/dbshrink.cxx b/dev/ese/src/ese/dbshrink.cxx index 3e873ccd..e6741811 100644 --- a/dev/ese/src/ese/dbshrink.cxx +++ b/dev/ese/src/ese/dbshrink.cxx @@ -1976,12 +1976,10 @@ LOCAL ERR ErrSHKIRootMoveCheck( const ROOTMOVE& rm, FUCB* const pfucb, const OBJ } // Get child's pgnoFDP. - FCBRef fcbRefChild; PGNO pgnoFDPChild = pgnoNull; Call( ErrCATSeekObjectByObjid( ppib, ifmp, objidTable, sysobjChild, objidChild, NULL, 0, &pgnoFDPChild ) ); - Call( ErrFILEFcbGet( ppib, ifmp, pgnoFDPChild, objidChild, fcbRefChild ) ); - Call( ErrBTOpen( ppib, fcbRefChild.get(), &pfucbChild ) ); + Call( ErrBTIOpen( ppib, ifmp, pgnoFDPChild, objidNil, openNormal, &pfucbChild, fFalse ) ); Call( ErrBTIGotoRoot( pfucbChild, latchRIW ) ); pfucbChild->pcsrRoot = Pcsr( pfucbChild ); @@ -2154,17 +2152,20 @@ ERR ErrSHKRootPageMove( // initialize helper variables. // - // Retrieve some metadata first by opening an FCB. - { - FCBRef fcbRef; - Call( ErrFILEFcbGet( ppib, ifmp, pgnoFDP, objidNil, fcbRef ) ); + // Retrieve some metadata first by opening it at the BT level. + Call( ErrBTOpen( ppib, pgnoFDP, ifmp, &pfucb ) ); + pfcb = pfucb->u.pfcb; // Determine whether or not this is a root object. - objid = fcbRef->ObjidFDP(); + objid = pfcb->ObjidFDP(); Call( ErrCATGetObjidMetadata( ppib, ifmp, objid, &objidTable, &sysobj ) ); fRootObject = ( sysobj == sysobjTable ); Assert( !!fRootObject == ( objid == objidTable ) ); - } + + // Close primitive cursor. + BTClose( pfucb ); + pfucb = pfucbNil; + pfcb = pfcbNil; if ( objid == pfmp->ObjidExtentPageCountCacheFDP() ) { diff --git a/dev/ese/src/ese/dbtask.cxx b/dev/ese/src/ese/dbtask.cxx index 8d7c4ea7..8a505eb2 100644 --- a/dev/ese/src/ese/dbtask.cxx +++ b/dev/ese/src/ese/dbtask.cxx @@ -1099,10 +1099,8 @@ VOID BATCHRECTASK::PrereadTaskBookmarks( PIB * const ppib, const INT itaskStart, BOOKMARK * rgbm; if( NULL != ( rgbm = new BOOKMARK[cbookmarksPreread] ) ) { - FCBRef fcbRef; FUCB * pfucb = pfucbNil; - if ( JET_errSuccess == ErrFILEFcbGet( ppib, m_ifmp, m_pgnoFDP, objidNil, fcbRef ) && - JET_errSuccess == ErrDIROpen( ppib, fcbRef.get(), &pfucb ) ) + if( JET_errSuccess == ErrDIROpen( ppib, m_pgnoFDP, m_ifmp, &pfucb ) ) { PIBTraceContextScope tcScope = ppib->InitTraceContextScope(); tcScope->iorReason.SetIort( iortRecTask ); diff --git a/dev/ese/src/ese/dbutil.cxx b/dev/ese/src/ese/dbutil.cxx index ff195e60..dc1ac32b 100644 --- a/dev/ese/src/ese/dbutil.cxx +++ b/dev/ese/src/ese/dbutil.cxx @@ -3026,7 +3026,6 @@ LOCAL ERR ErrEnumDataNodes( ) { ERR err; - FCBRef fcbRef; FUCB *pfucb = pfucbNil; BOOL fForceInit = fFalse; DIB dib; @@ -3034,32 +3033,31 @@ LOCAL ERR ErrEnumDataNodes( PGNO pgnoLastSeen = pgnoNull; CPG cpgSeen = 0; - CallR( ErrFILEFcbGet( ppib, ifmp, pgnoFDP, objidNil, fcbRef ) ); - CallR( ErrBTOpen( ppib, fcbRef.get(), &pfucb ) ); + CallR( ErrBTOpen( ppib, pgnoFDP, ifmp, &pfucb ) ); Assert( pfucbNil != pfucb ); - Assert( pfcbNil != fcbRef.get() ); + Assert( pfcbNil != pfucb->u.pfcb ); // This is shamelessly stolen from the space enumeration/printing code // that also walks a B-Tree during eseutil /ms, but its not clear if how // much of what we were doing here is goodness ... I read the lifecycle // of a FCB/FUCB doc, but it was still not clear. Haha, just kidding, I // would've read such a doc if it existed. - if ( !fcbRef->FInitialized() ) + if ( !pfucb->u.pfcb->FInitialized() ) { Assert( pgnoSystemRoot != pgnoFDP ); Assert( pgnoFDPMSO != pgnoFDP ); Assert( pgnoFDPMSO_NameIndex != pgnoFDP ); Assert( pgnoFDPMSO_RootObjectIndex != pgnoFDP ); - Assert( fcbRef->WRefCount() == 2 ); + Assert( pfucb->u.pfcb->WRefCount() == 1 ); - fcbRef->Lock(); + pfucb->u.pfcb->Lock(); // must force FCB to initialized state to allow SPGetInfo() to // open more cursors on the FCB -- this is safe because no // other thread should be opening this FCB - fcbRef->CreateComplete(); + pfucb->u.pfcb->CreateComplete(); - fcbRef->Unlock(); + pfucb->u.pfcb->Unlock(); fForceInit = fTrue; } @@ -3073,8 +3071,8 @@ LOCAL ERR ErrEnumDataNodes( BTUp( pfucb ); - if ( fcbRef->FPrimaryIndex() || - fcbRef->FTypeLV() || + if ( pfucb->u.pfcb->FPrimaryIndex() || + pfucb->u.pfcb->FTypeLV() || FFUCBSpace( pfucb ) ) { // we will be traversing the entire tree in order, preread all the pages @@ -3133,15 +3131,15 @@ LOCAL ERR ErrEnumDataNodes( if ( fForceInit ) { - Assert( fcbRef->WRefCount() == 2 ); + Assert( pfucb->u.pfcb->WRefCount() == 1 ); - fcbRef->Lock(); + pfucb->u.pfcb->Lock(); // force the FCB to be uninitialized so it will be purged by BTClose - fcbRef->CreateCompleteErr( errFCBUnusable ); + pfucb->u.pfcb->CreateCompleteErr( errFCBUnusable ); - fcbRef->Unlock(); + pfucb->u.pfcb->Unlock(); } BTClose( pfucb ); @@ -3233,13 +3231,11 @@ LOCAL ERR ErrDBUTLGetSpaceTreeInfo( CPRINTF * const pcprintf ) { ERR err; - FCBRef fcbRef; FUCB *pfucb = pfucbNil; BOOL fForceInit = fFalse; CPG rgcpgExtent[4]; - CallR( ErrFILEFcbGet( ppib, ifmp, pgnoFDP, objidFDP, fcbRef ) ); - CallR( ErrBTOpen( ppib, fcbRef.get(), &pfucb ) ); + CallR( ErrBTOpen( ppib, pgnoFDP, ifmp, &pfucb ) ); Assert( pfucbNil != pfucb ); Assert( pfcbNil != pfucb->u.pfcb ); @@ -3251,7 +3247,7 @@ LOCAL ERR ErrDBUTLGetSpaceTreeInfo( Assert( pgnoFDPMSO != pgnoFDP ); Assert( pgnoFDPMSO_NameIndex != pgnoFDP ); Assert( pgnoFDPMSO_RootObjectIndex != pgnoFDP ); - Assert( pfucb->u.pfcb->WRefCount() == 2 ); // +1 for fcbRef, +1 for pfucb + Assert( pfucb->u.pfcb->WRefCount() == 1 ); pfucb->u.pfcb->Lock(); @@ -3409,15 +3405,15 @@ LOCAL ERR ErrDBUTLGetSpaceTreeInfo( if ( fForceInit ) { - Assert( fcbRef->WRefCount() == 2 ); + Assert( pfucb->u.pfcb->WRefCount() == 1 ); - fcbRef->Lock(); + pfucb->u.pfcb->Lock(); - // force the FCB to be uninitialized so it will be purged by FCBRef deleter + // force the FCB to be uninitialized so it will be purged by BTClose - fcbRef->CreateCompleteErr( errFCBUnusable ); + pfucb->u.pfcb->CreateCompleteErr( errFCBUnusable ); - fcbRef->Unlock(); + pfucb->u.pfcb->Unlock(); } BTClose( pfucb ); @@ -5266,7 +5262,6 @@ LOCAL ERR ErrDBUTLIEstimateRootSpaceLeak( PIB* const ppib, const IFMP ifmp ) FUCB* pfucbSpaceTree = pfucbNil; FUCB* pfucbTable = pfucbNil; FUCB* pfucb = pfucbNil; - FCBRef fcbRef; szContext = "CheckInTrx"; if ( ppib->Level() > 0 ) @@ -5348,10 +5343,16 @@ LOCAL ERR ErrDBUTLIEstimateRootSpaceLeak( PIB* const ppib, const IFMP ifmp ) { // We are probably racing with table deletion. FCBStateFlags fcbsf = fcbsfNone; - FCBRef fcbRefLast( FCB::PfcbFCBGet( ifmp, pgnoFDPLast, &fcbsf, fTrue /* fIncrementRefCount */ ) ); - const BOOL fFoundFcb = ( fcbRefLast.get() != pfcbNil && fcbRefLast->ObjidFDP() == objidLast ); - const BOOL fDeletePending = fFoundFcb && ( fcbsf & fcbsfDeletePending ); - fcbRefLast.reset(); + OBJID objidFcb = objidNil; + const BOOL fFoundFcb = ( FCB::PfcbFCBGet( + ifmp, + pgnoFDPLast, + &fcbsf, + fFalse, // fIncrementRefCount + fTrue, // fInitForRecovery + &objidFcb ) != pfcbNil ) && + ( objidFcb == objidLast ); + const BOOL fDeletePending = fFoundFcb && ( ( fcbsf & fcbsfDeletePending ) != 0 ); if ( fFoundFcb ) { @@ -5475,13 +5476,10 @@ LOCAL ERR ErrDBUTLIEstimateRootSpaceLeak( PIB* const ppib, const IFMP ifmp ) szContext = "RootSpace"; // Open root. - Call( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRef ) ); - Assert( fcbRef->FInitialized() ); - Assert( fcbRef->FSpaceInitialized() ); - - Call( ErrBTOpen( ppib, fcbRef.get(), &pfucb ) ); + Call( ErrBTIOpen( ppib, ifmp, pgnoSystemRoot, objidNil, openNormal, &pfucb, fFalse ) ); Call( ErrBTIGotoRoot( pfucb, latchReadNoTouch ) ); pfucb->pcsrRoot = Pcsr( pfucb ); + Assert( pfucb->u.pfcb->FSpaceInitialized() ); // Root object. CPG rgcpgRootInfo[ 4 ] = { cpgNil }; diff --git a/dev/ese/src/ese/dir.cxx b/dev/ese/src/ese/dir.cxx index 3a590ae8..a98b12a4 100644 --- a/dev/ese/src/ese/dir.cxx +++ b/dev/ese/src/ese/dir.cxx @@ -207,6 +207,63 @@ ERR ErrDIRCreateDirectory( // Open/Close routines // +// opens a cursor on given ifmp, pgnoFDP +// +ERR ErrDIROpen( PIB *ppib, PGNO pgnoFDP, IFMP ifmp, FUCB **ppfucb, BOOL fWillInitFCB ) +{ + ERR err; + FUCB *pfucb; + + CheckPIB( ppib ); + +#ifdef DEBUG + INST *pinst = PinstFromPpib( ppib ); + if ( !pinst->FRecovering() + && pinst->m_fSTInit == fSTInitDone + && !Ptls()->FIsTaskThread() + && !Ptls()->fIsRCECleanup ) + { + CheckDBID( ppib, ifmp ); + } +#endif + + CallR( ErrBTOpen( ppib, pgnoFDP, ifmp, &pfucb, openNormal, fWillInitFCB ) ); + DIRInitOpenedCursor( pfucb, pfucb->ppib->Level() ); + + // set return pfucb + // + *ppfucb = pfucb; + return JET_errSuccess; +} + +// open cursor, don't touch root page +ERR ErrDIROpenNoTouch( PIB *ppib, IFMP ifmp, PGNO pgnoFDP, OBJID objidFDP, BOOL fUnique, FUCB **ppfucb, BOOL fWillInitFCB ) +{ + ERR err; + FUCB *pfucb; + + CheckPIB( ppib ); + +#ifdef DEBUG + INST *pinst = PinstFromPpib( ppib ); + if ( !pinst->FRecovering() + && pinst->m_fSTInit == fSTInitDone + && !Ptls()->FIsTaskThread() + && !Ptls()->fIsRCECleanup ) + { + CheckDBID( ppib, ifmp ); + } +#endif + + CallR( ErrBTOpenNoTouch( ppib, ifmp, pgnoFDP, objidFDP, fUnique, &pfucb, fWillInitFCB ) ); + DIRInitOpenedCursor( pfucb, pfucb->ppib->Level() ); + + // set return pfucb + // + *ppfucb = pfucb; + return JET_errSuccess; +} + // open cursor on given FCB // ERR ErrDIROpen( PIB *ppib, FCB *pfcb, FUCB **ppfucb ) diff --git a/dev/ese/src/ese/esedump.cxx b/dev/ese/src/ese/esedump.cxx index 4e1cd325..4ed03afa 100644 --- a/dev/ese/src/ese/esedump.cxx +++ b/dev/ese/src/ese/esedump.cxx @@ -361,7 +361,8 @@ LOCAL ERR ErrESEDUMPIndexForOneTable(FUCB *pfucbTable, JET_GRBIT grbitESEDUMPMod // open it Call (ErrDIROpen( pfucbTable->ppib, - pfcb, + pfcb->PgnoFDP(), + pfucbTable->ifmp, &pfucbIndex )); Assert(pfucbIndex != pfucbNil ); @@ -726,14 +727,15 @@ LOCAL ERR ErrESEDUMPCheckAndDumpSpaceInfo(FUCB *pfucb, JET_GRBIT grbitESEDUMPMod // then call the space info function for this page LOCAL ERR ErrESEDUMPDatabaseInfo(PIB *ppib, IFMP ifmp, JET_GRBIT grbitESEDUMPMode) { - FCBRef fcbRef; FUCB *pfucbDb = pfucbNil; ERR err = JET_errSuccess; // open the table - CallR( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRef ) ); - CallR( ErrDIROpen( ppib, fcbRef.get(), &pfucbDb ) ); - + CallR (ErrDIROpen( + ppib, + pgnoSystemRoot, + ifmp, + &pfucbDb )); Assert( pfucbNil != pfucbDb ); Assert( pfcbNil != pfucbDb->u.pfcb ); Assert( pgnoSystemRoot == pfucbDb->u.pfcb->PgnoFDP()); diff --git a/dev/ese/src/ese/fcb.cxx b/dev/ese/src/ese/fcb.cxx index 91a10247..cd866ad5 100644 --- a/dev/ese/src/ese/fcb.cxx +++ b/dev/ese/src/ese/fcb.cxx @@ -734,11 +734,12 @@ VOID FCB::UnlinkIDB( FCB *pfcbTable ) // NOTE: this is the proper channel for accessing an FCB; it uses the locking // protocol setup by the FCB hash-table and FCB latch -FCB *FCB::PfcbFCBGet( const IFMP ifmp, const PGNO pgnoFDP, FCBStateFlags* const pfcbsf, const BOOL fIncrementRefCount ) +FCB *FCB::PfcbFCBGet( const IFMP ifmp, const PGNO pgnoFDP, FCBStateFlags* const pfcbsf, const BOOL fIncrementRefCount, const BOOL fInitForRecovery, OBJID* const pobjid ) { FCBStateFlags fcbsf = fcbsfNone; INST *pinst = PinstFromIfmp( ifmp ); FCB *pfcbT; + OBJID objid = objidNil; FCBHash::ERR errFCBHash; FCBHash::CLock lockFCBHash; FCBHashKey keyFCBHash( ifmp, pgnoFDP ); @@ -798,10 +799,6 @@ FCB *FCB::PfcbFCBGet( const IFMP ifmp, const PGNO pgnoFDP, FCBStateFlags* const // using the existing abstraction. But I don't think it's as safe, since // I think that when we try to acquire the exclusive latch, we register // ourselves as next for the latch. - // SOMEONE: This can be simplified by not requiring a write latch on the fcb for refcount modification. - // If the refcount is interlocked incremented while holding the fcbhash latch, - // it will give the same lifetime guarantees for the fcb as currently implemented. - // Note that it can be interlocked decremented any time without holding the fcbhash latch (just like today). if ( pfcbT->FNeedLock_() ) { CSXWLatch::ERR errSXWLatch = pfcbT->m_sxwl.ErrAcquireExclusiveLatch(); @@ -885,6 +882,41 @@ FCB *FCB::PfcbFCBGet( const IFMP ifmp, const PGNO pgnoFDP, FCBStateFlags* const Assert( fcbsf == fcbsfNone ); fcbsf |= fcbsfInitialized; fcbsf |= ( pfcbT->FDeletePending() ? fcbsfDeletePending : fcbsfNone ); + objid = pfcbT->ObjidFDP(); + + // If this is the dummy FCB created by recovery, we need to fully populate + // it, make sure that the others wait while the first person finishes doing it + + if ( pfcbT != pfcbNil && !fInitForRecovery && pfcbT->FInitedForRecovery() ) + { + if ( !pfcbT->FDoingAdditionalInitializationDuringRecovery() ) + { + Assert( pfcbT->IsLocked_( LOCK_TYPE::ltWrite ) ); + pfcbT->SetDoingAdditionalInitializationDuringRecovery(); + } + else + { + // release write latch + pfcbT->Unlock_( LOCK_TYPE::ltWrite ); + + // FCB is not finished initializing + // update performance counter + + PERFOpt( cFCBCacheStalls.Inc( pinst ) ); + + // wait + + UtilSleep( 10 ); + + // try to get the FCB again + + fcbsf = fcbsfNone; + objid = objidNil; + + cRetries++; + goto RetrieveFCB; + } + } if ( pfcbT != pfcbNil ) { @@ -899,11 +931,16 @@ FCB *FCB::PfcbFCBGet( const IFMP ifmp, const PGNO pgnoFDP, FCBStateFlags* const SetStateAndReturn: // set the state Assert( ( pfcbT == pfcbNil ) == ( fcbsf == fcbsfNone ) ); // Pointer and flag must agree. + Assert( ( pfcbT == pfcbNil ) == ( objid == objidNil ) ); // Pointer and OBJID must agree. Assert( ( fcbsf == fcbsfNone ) || ( fcbsf & fcbsfInitialized ) ); // Can't have any flags set if it's not initialized. if ( pfcbsf ) { *pfcbsf = fcbsf; } + if ( pobjid ) + { + *pobjid = objid; + } // return the FCB Assert( ( pfcbNil == pfcbT ) || ( pfcbT->IsUnlocked_( LOCK_TYPE::ltShared ) && pfcbT->IsUnlocked_( LOCK_TYPE::ltWrite ) ) ); @@ -911,48 +948,6 @@ FCB *FCB::PfcbFCBGet( const IFMP ifmp, const PGNO pgnoFDP, FCBStateFlags* const } -// ========================================================================= -// FCB Init during recovery support. - -// Acquires mskFCBDoingAdditionalInitializationDuringRecovery locklessly. -// Returns fTrue for the first thread through, fFalse otherwise. -VOID FCB::AcquireAdditionalInitDuringRecovery() -{ - INST* pinst = PinstFromIfmp( Ifmp() ); - - for ( INT cRetries = 0; true; cRetries++ ) - { - // This could've been done locklessly but it is pointless. - // If another thread acquires this flag, then it is initializing the fcb under the write lock (a potentially heavy operation). - // We don't gain anything by spinning the cpu during that time. Better to wait for the write latch. - - Lock_( LOCK_TYPE::ltWrite ); - if ( !( m_ulFCBFlags & mskFCBDoingAdditionalInitializationDuringRecovery ) ) - { - m_ulFCBFlags |= mskFCBDoingAdditionalInitializationDuringRecovery; - Unlock_( LOCK_TYPE::ltWrite ); - return; - } - - Unlock_( LOCK_TYPE::ltWrite ); - - // Someone else is initializing the fcb - // update performance counter - PERFOpt( cFCBCacheStalls.Inc( pinst ) ); - AssertTrack( cRetries != 100000, "TooManyAdditionalInitDuringRecoveryRetries" ); - - // wait - UtilSleep( 10 ); - } -} - -VOID FCB::ReleaseAdditionalInitDuringRecovery() -{ - Assert( IsLocked_( LOCK_TYPE::ltWrite ) ); - m_ulFCBFlags &= ( ~mskFCBDoingAdditionalInitializationDuringRecovery ); -} - - // create a new FCB // // this function allocates an FCB and possibly recycles unused FCBs for later @@ -1178,7 +1173,7 @@ BOOL FCB::FScanAndPurge_( PERFOpt( cFCBAsyncScan.Inc( pinst, tce ) ); - if ( pfcbToPurge->FCheckFreeAndPurge_( fThreshold ) ) + if ( pfcbToPurge->FCheckFreeAndPurge_( ppib, fThreshold ) ) { // pfcbPurge is now gone. @@ -1405,9 +1400,10 @@ enum FCBPurgeFailReason : BYTE // fcbpfr // etc... (everything that makes it free), we can purge the FCB BOOL FCB::FCheckFreeAndPurge_( + _In_ PIB *ppib, _In_ const BOOL fThreshold ) { - INST *pinst = PinstFromIfmp( Ifmp() ); + INST *pinst = PinstFromPpib( ppib ); Assert( pinst->m_critFCBList.FOwner() ); Assert( IsUnlocked_( LOCK_TYPE::ltShared ) ); @@ -1453,6 +1449,11 @@ BOOL FCB::FCheckFreeAndPurge_( fFCBPossiblyFree = fFalse; fcbpfr = fcbpfrDeletePending; } + else if ( FDomainDenyRead( ppib ) ) + { + fFCBPossiblyFree = fFalse; + fcbpfr = fcbpfrDomainDenyRead; + } else if ( FOutstandingVersions_() ) { fFCBPossiblyFree = fFalse; @@ -1470,8 +1471,6 @@ BOOL FCB::FCheckFreeAndPurge_( } else { - EnforceSz( m_crefDomainDenyRead == 0, "FCBPurge_BadDenyReadRef" ); - EnforceSz( m_crefDomainDenyWrite == 0, "FCBPurge_BadDenyWriteRef" ); fFCBPossiblyFree = fTrue; } @@ -3169,9 +3168,9 @@ VOID FCBAssertAllClean( INST *pinst ) // under the assumption that the FCB you are refcounting will // not suddenly disappear (e.g. you own a cursor on it or know // for a fact that someone else does and they will not close it) -VOID FCB::IncrementRefCount( BOOL fOwnWriteLock /* = fFalse */ ) +VOID FCB::IncrementRefCount() { - IncrementRefCount_( fOwnWriteLock ); + IncrementRefCount_( fFalse ); } VOID FCB::IncrementRefCount_( BOOL fOwnWriteLock ) @@ -3286,13 +3285,13 @@ VOID FCB::DecrementRefCountAndUnlink_( FUCB *pfucb, const BOOL fLockList, const Unlock_( LOCK_TYPE::ltWrite ); - if ( fTryPurge && FTryPurgeOnClose() ) + if ( fTryPurge && ( pfucbNil != pfucb ) && FTryPurgeOnClose() ) { // We unlinked an FUCB from a table, and it was the last thing with // a refcount on the table. Try to purge the FCB. If we succeed, // it has to be the last reference to "this", as it may have // been purged. - BOOL fPurgeable = FCheckFreeAndPurge_( fFalse ); + BOOL fPurgeable = FCheckFreeAndPurge_( pfucb->ppib, fFalse ); if ( fPurgeable ) { diff --git a/dev/ese/src/ese/fcreate.cxx b/dev/ese/src/ese/fcreate.cxx index d2090bdc..fada7fb9 100644 --- a/dev/ese/src/ese/fcreate.cxx +++ b/dev/ese/src/ese/fcreate.cxx @@ -1856,7 +1856,6 @@ LOCAL ERR ErrFILEICreateIndexes( FCB * pfcbTemplateTable ) { ERR err = JET_errSuccess; - FCBRef fcbRefTableExtent; FUCB *pfucbTableExtent = pfucbNil; FUCB *pfucbCatalog = pfucbNil; CHAR szIndexName[ JET_cbNameMost+1 ]; @@ -1895,22 +1894,20 @@ LOCAL ERR ErrFILEICreateIndexes( } // Open cursor for space navigation - CallR( ErrFILEFcbGet( ppib, ifmp, pgnoTableFDP, objidTable, fcbRefTableExtent ) ); - Assert( !fcbRefTableExtent->FInitialized() ); - - // force the FCB to be initialized successfully - - fcbRefTableExtent->Lock(); - fcbRefTableExtent->SetTypeTable(); - fcbRefTableExtent->CreateComplete(); - fcbRefTableExtent->Unlock(); - - CallR( ErrDIROpen( ppib, fcbRefTableExtent.get(), &pfucbTableExtent ) ); + CallR( ErrDIROpen( ppib, pgnoTableFDP, ifmp, &pfucbTableExtent ) ); Assert( pfucbNil != pfucbTableExtent ); Assert( !FFUCBVersioned( pfucbTableExtent ) ); // Verify won't be deferred closed. Assert( pfcbNil != pfucbTableExtent->u.pfcb ); + Assert( !pfucbTableExtent->u.pfcb->FInitialized() ); Assert( pfucbTableExtent->u.pfcb->Pidb() == pidbNil ); + // force the FCB to be initialized successfully + + pfucbTableExtent->u.pfcb->Lock(); + pfucbTableExtent->u.pfcb->SetTypeTable(); + pfucbTableExtent->u.pfcb->CreateComplete(); + pfucbTableExtent->u.pfcb->Unlock(); + Call( ErrCATOpen( ppib, ifmp, &pfucbCatalog ) ); Assert( pfucbNil != pfucbCatalog ); @@ -2249,9 +2246,9 @@ LOCAL ERR ErrFILEICreateIndexes( } Assert( pfucbTableExtent != pfucbNil ); - Assert( pfucbTableExtent->u.pfcb->WRefCount() == 2 ); // +1 for fcbRef, +1 for open fucb + Assert( pfucbTableExtent->u.pfcb->WRefCount() == 1 ); - // force the FCB to be uninitialized so it will be purged by the FCBRef .dtor + // force the FCB to be uninitialized so it will be purged by DIRClose pfucbTableExtent->u.pfcb->Lock(); pfucbTableExtent->u.pfcb->CreateCompleteErr( errFCBUnusable ); @@ -2286,7 +2283,6 @@ LOCAL ERR ErrFILEIInheritIndexes( FCB * pfcbTemplateTable ) { ERR err = JET_errSuccess; - FCBRef fcbRefTableExtent; FUCB *pfucbTableExtent = pfucbNil; FUCB *pfucbCatalog = pfucbNil; TDB *ptdbTemplateTable; @@ -2309,8 +2305,7 @@ LOCAL ERR ErrFILEIInheritIndexes( Assert( pfcbTemplateTable->FTemplateTable() ); // Open cursor for space navigation - CallR( ErrFILEFcbGet( ppib, ifmp, pgnoTableFDP, objidTable, fcbRefTableExtent ) ); - CallR( ErrDIROpen( ppib, fcbRefTableExtent.get(), &pfucbTableExtent ) ); + CallR( ErrDIROpen( ppib, pgnoTableFDP, ifmp, &pfucbTableExtent ) ); Assert( pfucbNil != pfucbTableExtent ); Assert( !FFUCBVersioned( pfucbTableExtent ) ); // Verify won't be deferred closed. Assert( pfcbNil != pfucbTableExtent->u.pfcb ); @@ -2423,9 +2418,9 @@ LOCAL ERR ErrFILEIInheritIndexes( } Assert( pfucbTableExtent != pfucbNil ); - Assert( pfucbTableExtent->u.pfcb->WRefCount() == 2 ); // +1 for fcbRef, +1 for pfucbTableExtent + Assert( pfucbTableExtent->u.pfcb->WRefCount() == 1 ); - // force the FCB to be uninitialized so it will be purged by fcbRefTableExtent .dtor + // force the FCB to be uninitialized so it will be purged by DIRClose pfucbTableExtent->u.pfcb->Lock(); pfucbTableExtent->u.pfcb->CreateCompleteErr( errFCBUnusable ); @@ -2781,10 +2776,7 @@ ERR ErrFILECreateTable( PIB *ppib, IFMP ifmp, JET_TABLECREATE5_A *ptablecreate, // allocate cursor // - { - FCBRef fcbRef; - Call( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRef ) ); - Call( ErrDIROpen( ppib, fcbRef.get(), &pfucb ) ); + Call( ErrDIROpen( ppib, pgnoSystemRoot, ifmp, &pfucb ) ); Call( ErrDIRCreateDirectory( pfucb, CpgInitial( &jsphPrimaryAlloc, g_rgfmp[ ifmp ].CbPage() ), @@ -2794,7 +2786,6 @@ ERR ErrFILECreateTable( PIB *ppib, IFMP ifmp, JET_TABLECREATE5_A *ptablecreate, fSPFlags | ( FFMPIsTempDB( ifmp ) ? fSPUnversionedExtent : 0 ) ) ); // For temp. tables, create unversioned extents DIRClose( pfucb ); pfucb = pfucbNil; - } Assert( ptablecreate->cCreated == 0 ); ptablecreate->cCreated = 1; @@ -6577,7 +6568,7 @@ LOCAL ERR ErrFILEIUpdateIndex( PIB *ppib, FUCB *pfucbTable, FUCB *pfucbIndex ) LOCAL ERR ErrFILEIPrepareOneIndex( PIB * const ppib, FUCB * const pfucbTable, - FCBRef * poutFcbRefIdx, + FUCB ** ppfucbIdx, JET_INDEXCREATE3_A * const pidxcreate, const CHAR * const szIndexName, const CHAR * rgszColumns[], @@ -6589,7 +6580,6 @@ LOCAL ERR ErrFILEIPrepareOneIndex( const IFMP ifmp = pfucbTable->ifmp; FCB * const pfcb = pfucbTable->u.pfcb; FCB * pfcbIdx = pfcbNil; - FCBRef fcbRefIdx; PGNO pgnoIndexFDP; OBJID objidIndex; FIELD * pfield; @@ -6892,6 +6882,7 @@ LOCAL ERR ErrFILEIPrepareOneIndex( // DIRBeforeFirst( pfucbTable ); + Assert( pfucbNil == *ppfucbIdx ); Assert( pfcbIdx == pfcbNil ); } else @@ -6914,8 +6905,10 @@ LOCAL ERR ErrFILEIPrepareOneIndex( // get pfcb of index directory // - Call( ErrFILEFcbGet( ppib, ifmp, pgnoIndexFDP, objidIndex, fcbRefIdx ) ); - pfcbIdx = fcbRefIdx.get(); + Call( ErrDIROpen( ppib, pgnoIndexFDP, ifmp, ppfucbIdx ) ); + Assert( *ppfucbIdx != pfucbNil ); + Assert( !FFUCBVersioned( *ppfucbIdx ) ); // Verify won't be deferred closed. + pfcbIdx = (*ppfucbIdx)->u.pfcb; Assert( !pfcbIdx->FInitialized() ); Assert( pfcbIdx->Pidb() == pidbNil ); @@ -6969,6 +6962,10 @@ LOCAL ERR ErrFILEIPrepareOneIndex( pfcbIdx->Lock(); pfcbIdx->CreateCompleteErr( errFCBUnusable ); pfcbIdx->Unlock(); + + // verify that the FUCB will not be defer-closed + + Assert( !FFUCBVersioned( *ppfucbIdx ) ); } goto HandleError; } @@ -6988,9 +6985,6 @@ LOCAL ERR ErrFILEIPrepareOneIndex( rgidxsegConditional, pspacehints ) ); - // Return fcb to caller - *poutFcbRefIdx = std::move( fcbRefIdx ); - HandleError: if ( fCleanupIDB ) { @@ -7061,7 +7055,6 @@ LOCAL ERR VTAPI ErrFILEICreateIndex( FUCB * pfucb = pfucbNil; FUCB * pfucbIdx = pfucbNil; FCB * const pfcb = pfucbTable->u.pfcb; - FCBRef fcbRefIdx; FCB * pfcbIdx = pfcbNil; IDB idb( pinst ); CHAR szIndexName[ JET_cbNameMost+1 ]; @@ -7219,7 +7212,7 @@ LOCAL ERR VTAPI ErrFILEICreateIndex( Call( ErrFILEIPrepareOneIndex( ppib, pfucb, - &fcbRefIdx, + &pfucbIdx, pidxcreate, szIndexName, rgszColumns, @@ -7230,14 +7223,14 @@ LOCAL ERR VTAPI ErrFILEICreateIndex( if ( fPrimary ) { Assert( pfucbNil == pfucbIdx ); - Assert( pfcbNil == fcbRefIdx.get() ); Assert( pfcbNil == pfcbIdx ); } else { - Assert( pfcbNil != fcbRefIdx.get() ); - pfcbIdx = fcbRefIdx.get(); + Assert( pfucbNil != pfucbIdx ); + pfcbIdx = pfucbIdx->u.pfcb; + Assert( pfcbNil != pfcbIdx ); Assert( pfcbIdx->FTypeSecondaryIndex() ); if ( pidxcreate->grbit & JET_bitIndexEmpty ) @@ -7284,7 +7277,6 @@ LOCAL ERR VTAPI ErrFILEICreateIndex( const IDBFLAG idbflagPersisted = idb.FPersistedFlags(); const IDXFLAG idbflagPersistedX = idb.FPersistedFlagsX(); - Call( ErrDIROpen( ppib, pfcbIdx, &pfucbIdx ) ); FUCBSetIndex( pfucbIdx ); FUCBSetSecondary( pfucbIdx ); @@ -7471,17 +7463,15 @@ LOCAL ERR VTAPI ErrFILEICreateIndex( pfcbIdx->Pidb()->FPersistedFlags(), pfcbIdx->Pidb()->FPersistedFlagsX() ) ); } - - // FCB now linked into table's index list, which guarantees that - // it will be available at Commit/Rollback time, so we can dispose - // of the index cursor. - Assert( pfucbNil != pfucbIdx ); - Assert( !FFUCBVersioned( pfucbIdx ) ); // no versioned operations should have occurred on this cursor - DIRClose( pfucbIdx ); - pfucbIdx = pfucbNil; - fcbRefIdx.reset(); - pfcbIdx = pfcbNil; } + + // FCB now linked into table's index list, which guarantees that + // it will be available at Commit/Rollback time, so we can dispose + // of the index cursor. + Assert( !FFUCBVersioned( pfucbIdx ) ); // no versioned operations should have occurred on this cursor + Assert( pfucbNil != pfucbIdx ); + DIRClose( pfucbIdx ); + pfucbIdx = pfucbNil; } Call( ErrDIRCommitTransaction( ppib, ( pidxcreate->grbit & JET_bitIndexLazyFlush ) ? JET_bitCommitLazyFlush : 0 ) ); @@ -7519,7 +7509,6 @@ LOCAL ERR VTAPI ErrFILEICreateIndex( DIRClose( pfucbIdx ); } - fcbRefIdx.reset(); // rollback has to purge the fcb, no refs should remain CallSx( ErrDIRRollback( ppib ), JET_errRollbackError ); DIRClose( pfucb ); @@ -7589,7 +7578,7 @@ LOCAL ERR VTAPI ErrFILEIBatchCreateIndex( BOOL fInTransaction = fFalse; BOOL fLazyCommit = fTrue; ULONG iindex; - FCBRef* rgFcbRef = NULL; + FUCB ** rgpfucbIdx = NULL; JET_INDEXCREATE3_A *pidxcreateT = NULL; JET_INDEXCREATE3_A *pidxcreateNext = NULL; @@ -7644,10 +7633,11 @@ LOCAL ERR VTAPI ErrFILEIBatchCreateIndex( return JET_errSuccess; } - AllocR( rgFcbRef = new FCBRef[ cIndexes ] ); + AllocR( rgpfucbIdx = (FUCB **)PvOSMemoryHeapAlloc( sizeof(FUCB *) * cIndexes ) ); + memset( rgpfucbIdx, 0, sizeof(FUCB *) * cIndexes ); // Temporarily open new table cursor. - Call( ErrDIROpen( ppib, pfcb, &pfucb ) ); + CallJ( ErrDIROpen( ppib, pfcb, &pfucb ), Cleanup ); FUCBSetIndex( pfucb ); FUCBSetMayCacheLVCursor( pfucb ); @@ -7721,7 +7711,7 @@ LOCAL ERR VTAPI ErrFILEIBatchCreateIndex( Call( ErrFILEIPrepareOneIndex( ppib, pfucb, - &rgFcbRef[ iindex ], + &rgpfucbIdx[iindex], pidxcreateT, szIndexName, rgszColumns, @@ -7729,7 +7719,7 @@ LOCAL ERR VTAPI ErrFILEIBatchCreateIndex( &idb, &jsphIndex ) ); - pfcbIndexT = rgFcbRef[ iindex ].get(); + pfcbIndexT = rgpfucbIdx[iindex]->u.pfcb; Assert( pfcbIndexT->FTypeSecondaryIndex() ); pfcbIndexT->SetPfcbNextIndex( pfcbIndexes ); pfcbIndexT->SetPfcbTable( pfcb ); @@ -7746,10 +7736,10 @@ LOCAL ERR VTAPI ErrFILEIBatchCreateIndex( pfcb->EnterDDL(); - Assert( pfcbNil == rgFcbRef[ 0 ]->PfcbNextIndex() ); + Assert( pfcbNil == rgpfucbIdx[0]->u.pfcb->PfcbNextIndex() ); Assert( cIndexes > 0 ); - Assert( pfcbIndexes == rgFcbRef[ cIndexes - 1 ].get() ); - rgFcbRef[ 0 ]->SetPfcbNextIndex( pfcb->PfcbNextIndex() ); + Assert( pfcbIndexes == rgpfucbIdx[cIndexes-1]->u.pfcb ); + rgpfucbIdx[0]->u.pfcb->SetPfcbNextIndex( pfcb->PfcbNextIndex() ); pfcb->SetPfcbNextIndex( pfcbIndexes ); FILESetAllIndexMask( pfcb ); @@ -7758,7 +7748,10 @@ LOCAL ERR VTAPI ErrFILEIBatchCreateIndex( for ( iindex = 0; iindex < cIndexes; iindex++ ) { - FCB* const pfcbIndexT = rgFcbRef[ iindex ].get(); + FUCB * const pfucbIndexT = rgpfucbIdx[iindex]; + Assert( pfucbNil != pfucbIndexT ); + + FCB * const pfcbIndexT = pfucbIndexT->u.pfcb; Assert( pfcbNil != pfcbIndexT ); Assert( pfcbIndexT->FTypeSecondaryIndex() ); @@ -7766,9 +7759,11 @@ LOCAL ERR VTAPI ErrFILEIBatchCreateIndex( pfcbIndexT->Lock(); pfcbIndexT->ResetDontLogSpaceOps(); pfcbIndexT->Unlock(); + + Assert( !FFUCBVersioned( pfucbIndexT ) ); // No versioned operations should have been performed, so won't be defer-closed. + DIRClose( pfucbIndexT ); } - delete[] rgFcbRef; Call( ErrDIRCommitTransaction( ppib, fLazyCommit ? JET_bitCommitLazyFlush : 0 ) ); fInTransaction = fFalse; @@ -7780,16 +7775,30 @@ LOCAL ERR VTAPI ErrFILEIBatchCreateIndex( { Assert( err < 0 ); // Must have hit an error. - delete[] rgFcbRef; + if ( NULL != rgpfucbIdx ) + { + for ( iindex = 0; iindex < cIndexes; iindex++ ) + { + if ( pfucbNil != rgpfucbIdx[iindex] ) + { + Assert( !FFUCBVersioned( rgpfucbIdx[iindex] ) ); // No versioned operations should have been performed, so won't be defer-closed. + DIRClose( rgpfucbIdx[iindex] ); + } + } + } + CallSx( ErrDIRRollback( ppib ), JET_errRollbackError ); } - if ( pfucb != pfucbNil ) + DIRClose( pfucb ); + AssertDIRNoLatch( ppib ); + +Cleanup: + if ( NULL != rgpfucbIdx ) { - DIRClose( pfucb ); + OSMemoryHeapFree( rgpfucbIdx ); } - AssertDIRNoLatch( ppib ); return err; } @@ -8559,7 +8568,6 @@ ERR ErrFILEDeleteTable( PIB *ppib, IFMP ifmp, const CHAR *szName, const BOOL fAl FUCB *pfucb = pfucbNil; FUCB *pfucbParent = pfucbNil; FCB *pfcb = pfcbNil; - FCBRef fcbRefParent; OBJID objidTable; CHAR szTable[JET_cbNameMost+1]; BOOL fInUseBySystem; @@ -8608,8 +8616,7 @@ ERR ErrFILEDeleteTable( PIB *ppib, IFMP ifmp, const CHAR *szName, const BOOL fAl // open cursor on database and seek to table without locking // - Call( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRefParent ) ); - Call( ErrDIROpen( ppib, fcbRefParent.get(), &pfucbParent ) ); + Call( ErrDIROpen( ppib, pgnoSystemRoot, ifmp, &pfucbParent ) ); { JET_GRBIT grbitOpen = JET_bitTableDelete | JET_bitTableDenyRead; diff --git a/dev/ese/src/ese/fileopen.cxx b/dev/ese/src/ese/fileopen.cxx index 882fc15f..d726dc61 100644 --- a/dev/ese/src/ese/fileopen.cxx +++ b/dev/ese/src/ese/fileopen.cxx @@ -1420,14 +1420,12 @@ ERR ErrFILEIOpenTable( ERR err; ERR wrnSurvives = JET_errSuccess; FUCB *pfucb = pfucbNil; - FCB *pfcb = pfcbNil; - FCBRef fcbRef; + FCB *pfcb; CHAR szTable[JET_cbNameMost+1]; PGNO pgnoFDP = pgnoNull; OBJID objidTable = objidNil; BOOL fInTransaction = fFalse; BOOL fInitialisedCursor = fFalse; - BOOL fAddlInitDuringRecovery = fFalse; TABLECLASS tableclass = tableclassNone; Assert( ppib != ppibNil ); @@ -1619,8 +1617,7 @@ ERR ErrFILEIOpenTable( Assert( objidNil != objidTable ); Assert( objidTable > objidSystemRoot ); - Call( ErrFILEFcbGetNoTouch( ppib, ifmp, pgnoFDP, objidTable, fcbRef ) ); - Call( ErrDIROpen( ppib, fcbRef.get(), &pfucb ) ); + Call( ErrDIROpenNoTouch( ppib, ifmp, pgnoFDP, objidTable, fTrue, &pfucb, fTrue ) ); Assert( pfucbNil != pfucb ); pfcb = pfucb->u.pfcb; @@ -1681,20 +1678,8 @@ ERR ErrFILEIOpenTable( // if we're opening after table creation, the FCB shouldn't be initialised Assert( !( grbit & JET_bitTableCreate ) || !pfcb->FInitialized() ); - if ( pfcb->FInitedForRecovery() ) - { - // If we find a partially initialized FCB (created by redo), we need to fully initialize it. - // Acquire additional init flag, which would only allow 1 thread at a time through for initialization. - // The first thread through will go in to the init block below. - Assert( pfcb->FInitialized() ); // FCB is in a quasi-initialized state - - pfcb->AcquireAdditionalInitDuringRecovery(); - fAddlInitDuringRecovery = fTrue; - } - - // Only one thread (the one that created a new FCB) could possibly get to this point with an uninitialized FCB, - // because FCB::PFcbGet() doesn't return uninitalized FCBs (it spin-waits for them to be initialized). - // Which is why we don't have to grab the FCB's critical section. + // Only one thread could possibly get to this point with an uninitialized + // FCB, which is why we don't have to grab the FCB's critical section. if ( !pfcb->FInitialized() || pfcb->FInitedForRecovery() ) { if ( fInTransaction ) @@ -1708,12 +1693,12 @@ ERR ErrFILEIOpenTable( switch ( ttSubject ) { case tt::System: - Call( ErrCATInitCatalogFCB( ppib, pfcb ) ); + Call( ErrCATInitCatalogFCB( pfucb ) ); break; case tt::Temp: Assert( !( grbit & JET_bitTableDelete ) ); - Call( ErrCATInitTempFCB( ppib, pfcb ) ); + Call( ErrCATInitTempFCB( pfucb ) ); break; case tt::ExtentPageCountCache: @@ -1724,7 +1709,7 @@ ERR ErrFILEIOpenTable( // initialize the table's FCB // - Call( ErrCATInitFCB( ppib, pfcb, objidTable, !( grbit & JET_bitAllowPgnoFDPLastSetTime ) ) ); + Call( ErrCATInitFCB( pfucb, objidTable, !( grbit & JET_bitAllowPgnoFDPLastSetTime ) ) ); const ULONG cPageReadAfter = Ptls()->threadstats.cPageRead; const ULONG cPagePrereadAfter = Ptls()->threadstats.cPagePreread; @@ -1791,12 +1776,6 @@ ERR ErrFILEIOpenTable( pfcb->CreateComplete(); pfcb->ResetInitedForRecovery(); - if ( fAddlInitDuringRecovery ) - { - pfcb->ReleaseAdditionalInitDuringRecovery(); - fAddlInitDuringRecovery = fFalse; - } - err = ErrFILEICheckAndSetMode( pfucb, grbit ) + ErrFaultInjection( 38304 ); if ( err >= JET_errSuccess ) @@ -1988,20 +1967,6 @@ ERR ErrFILEIOpenTable( return err; HandleError: - if ( fAddlInitDuringRecovery ) - { - Assert( err < JET_errSuccess ); - pfcb->Lock(); - - // Error duing init means that we should be leaving the FCB in a semi-initialized state, - // the same state we initially encountered. - // The next thread to acquire addl init flag will re-attempt initialization. - EnforceSz( pfcb->FInitedForRecovery(), "FCBAddlInit_BadState" ); - - pfcb->ReleaseAdditionalInitDuringRecovery(); - pfcb->Unlock(); - } - Assert( pfucbNil != pfucb || !fInitialisedCursor ); if ( pfucbNil != pfucb ) { @@ -2015,8 +1980,6 @@ ERR ErrFILEIOpenTable( } } - fcbRef.reset(); // do we need to release the FCB before trx rollback? - if ( fInTransaction ) { CallSx( ErrDIRRollback( ppib ), JET_errRollbackError ); @@ -2322,132 +2285,6 @@ VOID FILETableMustRollback( PIB *ppib, FCB *pfcbTable ) } -// Latches pgnoFDP and reads objidFDP off of the PGHDR. -LOCAL ERR ErrFILEIGetObjidFromPgnoFDP( PIB* ppib, IFMP ifmp, PGNO pgnoFDP, OBJID* pobjidFDP ) -{ - ERR err; - - if ( pgnoFDP == pgnoSystemRoot ) - { - *pobjidFDP = objidSystemRoot; - return JET_errSuccess; - } - else if ( FCATSystemTable( pgnoFDP ) ) - { - *pobjidFDP = ObjidCATTable( pgnoFDP ); - return JET_errSuccess; - } - else - { - CSR csr; - CallR( csr.ErrGetReadPage( ppib, ifmp, pgnoFDP, bflfDefault ) ); - - *pobjidFDP = csr.Cpage().ObjidFDP(); - Assert( *pobjidFDP != objidNil ); - csr.ReleasePage(); - return JET_errSuccess; // clobber warnings - } -} - - -// Creates a new FCB, and adds it to the FCB hash and the inst's FCB list. -// Sets the following essential properties on the FCB: -// - objidFDP -// Optional properties are set later in the FCB lifetime. They include: -// - Schema/metadata info (rec info, index info, unique/non-unique etc), set by ErrFILEIInitializeFCB(). -// - Space header (pgnoOE, pgnoAE), by ErrBTOpen(), or deferred initialized by space (in case of ErrBTOpenNoTouch()). -// Note that the FCB is considered ill-formed without the essential properties set. -// But may be usable in some limited form without the optional properties set (e.g. during recovery). -ERR ErrFILEIOpenFCB( - PIB *ppib, - IFMP ifmp, - PGNO pgnoFDP, - OBJID objidFDP, - OPENTYPE opentype, - FCBRef& fcbRef ) -{ - ERR err = JET_errSuccess; - FCB *pfcb = pfcbNil; - - // We are creating an FCB for a new objid, its catalog entry or pgnoFDP may not be initialized yet. - if ( opentype != openNew ) - { - if ( objidFDP == objidNil ) - { - // System tables have constant objids. - // There is no reason for the caller not to pass them in (except repair that may not have enough context). - Assert( !FCATBaseSystemFDP( pgnoFDP ) || g_fRepair ); - Assert( opentype != openNormalNoTouch ); // we need to latch the pgnoFDP here - - Call( ErrFILEIGetObjidFromPgnoFDP( ppib, ifmp, pgnoFDP, &objidFDP ) ); - } - } - - // Create a new FCB and add to FCB hash. - // Only 1 thread gets past this point with JET_errSuccess. - Call( FCB::ErrCreate( ppib, ifmp, pgnoFDP, &pfcb ) ); - - // the creation was successful - - Assert( pfcb->IsLocked() ); - Assert( pfcb->FTypeNull() ); // No fcbtype yet. - Assert( pfcb->Ifmp() == ifmp ); - Assert( pfcb->PgnoFDP() == pgnoFDP ); - Assert( !pfcb->FInitialized() ); - Assert( !pfcb->FSpaceInitialized() ); - Assert( pfcb->WRefCount() == 0 ); - - pfcb->SetObjidFDP( objidFDP ); - - // Increment refcount and set guard object. - pfcb->IncrementRefCount( fTrue /* fOwnWriteLock */ ); - fcbRef.reset( pfcb ); - - pfcb->Unlock(); - - if ( pgnoFDP == pgnoSystemRoot ) - { - // SPECIAL CASE: For database cursor, we've got all the - // information we need. - - // when opening db cursor, always force to check the root page - Assert( objidNil == objidFDP || objidSystemRoot == pfcb->ObjidFDP() ); - - pfcb->Lock(); - pfcb->SetTypeDatabase(); - pfcb->CreateComplete(); // fcb is marked as initialized - pfcb->Unlock(); - - // insert this FCB into the global list, as it is fully initialized - pfcb->InsertList(); - } - - // Initialize space properties. - // Some codepaths require space properties to be cached on the FCB right after creation. - - if ( opentype == openNormal ) - { - Call( ErrSPInitFCB( ppib, pfcb ) ); - } - else - { - Assert( opentype == openNew || opentype == openNormalNoTouch ); - } - - // Finish creating this FCB, non-database FCBs are not fully initialized yet. - // Caller must deal with the rest of the initialization sequence. - Assert( pfcb->WRefCount() == 1 ); - return err; - -HandleError: - // FCB creation can't race with anything else if we are creating a new object. - Assert( err != errFCBExists || opentype != openNew ); - - fcbRef.reset(); // return NULL - return err; -} - - ERR ErrFILEIInitializeFCB( PIB *ppib, IFMP ifmp, @@ -2474,7 +2311,6 @@ ERR ErrFILEIInitializeFCB( pfcbNew->SetPtdb( ptdb ); pfcbNew->Lock(); pfcbNew->SetPrimaryIndex(); - pfcbNew->SetUnique(); // primary index is always unique Assert( !pfcbNew->FSequentialIndex() ); if ( pidbNil == pidb ) { @@ -2493,9 +2329,6 @@ ERR ErrFILEIInitializeFCB( pfcbNew->Lock(); pfcbNew->SetTypeSecondaryIndex(); - - Assert( pidb != NULL ); - pidb->FUnique() ? pfcbNew->SetUnique() : pfcbNew->SetNonUnique(); pfcbNew->Unlock(); } @@ -2565,77 +2398,6 @@ ERR ErrFILEIInitializeFCB( } -LOCAL ERR ErrFILEIFcbGet( PIB* ppib, IFMP ifmp, PGNO pgnoFDP, OBJID objidFDP, OPENTYPE opentype, FCBRef& fcbRef ) -{ - ERR err = JET_errSuccess; - FCBStateFlags fcbsf; - ULONG cRetries = 0; - -RetrieveFCB: - AssertTrack( cRetries != 100000, "TooManyFcbOpenRetries" ); - - // get the FCB for the given ifmp/pgnoFDP - - fcbRef.reset( FCB::PfcbFCBGet( ifmp, pgnoFDP, &fcbsf, fTrue /* fIncrementRefcount */ ) ); - if ( fcbRef.get() == pfcbNil ) - { - // the FCB does not exist - - Assert( fcbsfNone == fcbsf ); - - // try to create a new FCB - - err = ErrFILEIOpenFCB( ppib, ifmp, pgnoFDP, objidFDP, opentype, fcbRef ); - Assert( err <= JET_errSuccess ); // Shouldn't return warnings. - - if ( err == errFCBExists ) - { - - // we failed because someone else was racing to create - // the same FCB that we want, but they beat us to it - - // try to get the FCB again - - err = JET_errSuccess; - UtilSleep( 10 ); - cRetries++; - goto RetrieveFCB; - } - Call( err ); - } - else - { - if ( fcbsf & fcbsfInitialized ) - { - Assert( fcbRef->WRefCount() >= 1); - } - else - { - FireWall( "DeprecatedSentinelFcbBtOpen" ); // Sentinel FCBs are believed deprecated - Assert( !FFMPIsTempDB( ifmp ) ); // Sentinels not used by sort/temp. tables. - - // If we encounter a sentinel, it means the - // table has been locked for subsequent deletion. - fcbRef.reset(); - err = ErrERRCheck( JET_errTableLocked ); - } - } - -HandleError: - return err; -} - -ERR ErrFILEFcbGet( PIB* ppib, IFMP ifmp, PGNO pgnoFDP, OBJID objidFDP, FCBRef& pfcbRef ) -{ - return ErrFILEIFcbGet( ppib, ifmp, pgnoFDP, objidFDP, openNormal, pfcbRef ); -} - -ERR ErrFILEFcbGetNoTouch( PIB* ppib, IFMP ifmp, PGNO pgnoFDP, OBJID objidFDP, FCBRef& pfcbRef ) -{ - return ErrFILEIFcbGet( ppib, ifmp, pgnoFDP, objidFDP, openNormalNoTouch, pfcbRef ); -} - - INLINE VOID RECIForceTaggedColumnsAsDerived( const TDB * const ptdb, DATA& dataDefault ) diff --git a/dev/ese/src/ese/lv.cxx b/dev/ese/src/ese/lv.cxx index dcb20912..ac45a0a6 100644 --- a/dev/ese/src/ese/lv.cxx +++ b/dev/ese/src/ese/lv.cxx @@ -519,14 +519,12 @@ INLINE ERR ErrFILEIInitLVRoot( FUCB *pfucb, const PGNO pgnoLV, FUCB **ppfucbLV ) ERR err; FCB * const pfcbTable = pfucb->u.pfcb; FCB * pfcbLV; - FCBRef fcbRefLV; // Link LV FCB into table. - CallR( ErrFILEFcbGet( pfucb->ppib, pfucb->ifmp, pgnoLV, objidNil, fcbRefLV ) ); - CallR( ErrDIROpen( pfucb->ppib, fcbRefLV.get(), ppfucbLV ) ); + CallR( ErrDIROpen( pfucb->ppib, pgnoLV, pfucb->ifmp, ppfucbLV, fTrue ) ); Assert( *ppfucbLV != pfucbNil ); Assert( !FFUCBVersioned( *ppfucbLV ) ); // Verify won't be deferred closed. - pfcbLV = fcbRefLV.get(); + pfcbLV = (*ppfucbLV)->u.pfcb; Assert( !pfcbLV->FInitialized() || pfcbLV->FInitedForRecovery() ); Assert( pfcbLV->Ifmp() == pfucb->ifmp ); @@ -536,7 +534,6 @@ INLINE ERR ErrFILEIInitLVRoot( FUCB *pfucb, const PGNO pgnoLV, FUCB **ppfucbLV ) // Recovery creates all FCBs as table FCB, now that we know better, we need to remove from list of table FCBs // before we mark FCB as being a LV FCB - pfcbLV->AcquireAdditionalInitDuringRecovery(); // allows only 1 thread through at a time if ( pfcbLV->FInitedForRecovery() ) { pfcbLV->RemoveList(); @@ -561,12 +558,10 @@ INLINE ERR ErrFILEIInitLVRoot( FUCB *pfucb, const PGNO pgnoLV, FUCB **ppfucbLV ) } // finish the initialization of this LV FCB - // an initialized fcb isn't purged by the FCBRef deleter pfcbLV->Lock(); pfcbLV->CreateComplete(); pfcbLV->ResetInitedForRecovery(); - pfcbLV->ReleaseAdditionalInitDuringRecovery(); pfcbLV->Unlock(); // WARNING: publishing the FCB in the TDB *must* diff --git a/dev/ese/src/ese/repair.cxx b/dev/ese/src/ese/repair.cxx index 12b2fea0..f744915e 100644 --- a/dev/ese/src/ese/repair.cxx +++ b/dev/ese/src/ese/repair.cxx @@ -8216,7 +8216,6 @@ LOCAL ERR ErrREPAIRRepairGlobalSpace( const PGNO pgnoLast = PgnoLast( ifmp ); const CPG cpgOwned = PgnoLast( ifmp ) - 3; // we will insert three pages in the ErrSPCreate below - FCBRef fcbRef; FUCB *pfucb = pfucbNil; FUCB *pfucbOE = pfucbNil; @@ -8243,8 +8242,7 @@ LOCAL ERR ErrREPAIRRepairGlobalSpace( // So, don't bother adding the (objidFDP, cpgOEFDP, cpgAEFDP) triplet to the // cache. - Call( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRef ) ); - Call( ErrDIROpen( ppib, fcbRef.get(), &pfucb ) ); + Call( ErrDIROpen( ppib, pgnoSystemRoot, ifmp, &pfucb ) ); // The tree has only one node so we can insert ths node without splitting Call( ErrSPIOpenOwnExt( pfucb, &pfucbOE ) ); @@ -8332,7 +8330,6 @@ LOCAL ERR ErrREPAIRDeleteCorruptedEntriesFromCatalog( // ================================================================ { ERR err = JET_errSuccess; - FCBRef fcbRefCatalog; FUCB * pfucbCatalog = pfucbNil; ENTRYINFO entryinfo; @@ -8345,8 +8342,7 @@ LOCAL ERR ErrREPAIRDeleteCorruptedEntriesFromCatalog( CallR( ErrDIRBeginTransaction( ppib, 64549, NO_GRBIT ) ); - Call( ErrFILEFcbGet( ppib, ifmp, pgnoFDPMSO, objidFDPMSO, fcbRefCatalog ) ); - Call( ErrDIROpen( ppib, fcbRefCatalog.get(), &pfucbCatalog ) ); + Call( ErrDIROpen( ppib, pgnoFDPMSO, ifmp, &pfucbCatalog ) ); Assert( pfucbNil != pfucbCatalog ); FUCBSetIndex( pfucbCatalog ); @@ -8437,8 +8433,6 @@ LOCAL ERR ErrREPAIRDeleteCorruptedEntriesFromCatalog( DIRClose( pfucbCatalog ); } - fcbRefCatalog.reset(); // do we need to release the FCB before trx rollback? - if ( JET_errSuccess != err ) { CallSx( ErrDIRRollback( ppib ), JET_errRollbackError ); @@ -8487,7 +8481,6 @@ LOCAL ERR ErrREPAIRRepairCatalogs( { ERR err = JET_errSuccess; - FCBRef fcbRefParent; FUCB * pfucbParent = pfucbNil; FUCB * pfucbCatalog = pfucbNil; FUCB * pfucbShadowCatalog = pfucbNil; @@ -8500,8 +8493,7 @@ LOCAL ERR ErrREPAIRRepairCatalogs( if ( fCatalogCorrupt || fShadowCatalogCorrupt ) { // we'll need this for the space - Call( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRefParent ) ); - Call( ErrDIROpen( ppib, fcbRefParent.get(), &pfucbParent ) ); + Call( ErrDIROpen( ppib, pgnoSystemRoot, ifmp, &pfucbParent ) ); } if ( fCatalogCorrupt && fShadowCatalogCorrupt ) @@ -9004,7 +8996,6 @@ LOCAL ERR ErrREPAIRCopyTempTableToCatalog( JET_ERR err = JET_errSuccess; - FCBRef fcbRefParent; FUCB * pfucbParent = pfucbNil; FUCB * pfucbCatalog = pfucbNil; FUCB * pfucbSpace = pfucbNil; @@ -9016,8 +9007,7 @@ LOCAL ERR ErrREPAIRCopyTempTableToCatalog( VOID * pvData = NULL; BFAlloc( bfasIndeterminate, &pvData ); - Call( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRefParent ) ); - Call( ErrDIROpen( ppib, fcbRefParent.get(), &pfucbParent ) ); + Call( ErrDIROpen( ppib, pgnoSystemRoot, ifmp, &pfucbParent ) ); Assert( pfucbNil != pfucbParent ); // when we create this we cannot make all the pages available, some will be needed later @@ -9661,12 +9651,10 @@ LOCAL ERR ErrREPAIRCreateEmptyFDP( const CPG cpgMin = cpgMultipleExtentMin; CPG cpgRequest = cpgMin; - FCBRef fcbRef; FUCB * pfucb = pfucbNil; // the fucb is used to get an extent from the parent - Call( ErrFILEFcbGet( ppib, ifmp, pgnoParent, objidNil, fcbRef ) ); - Call( ErrDIROpen( ppib, fcbRef.get(), &pfucb ) ); + Call( ErrDIROpen( ppib, pgnoParent, ifmp, &pfucb ) ); if ( pgnoNull == *ppgnoFDPNew ) { Call( ErrSPGetExt( @@ -9864,7 +9852,6 @@ LOCAL ERR ErrREPAIRRebuildSpace( CPG cpgRun = 0; FUCB *pfucbOE = pfucbNil; - FCBRef fcbRefParent; FUCB *pfucbParent = pfucbNil; const OBJID objidFDP = pfucb->u.pfcb->ObjidFDP(); @@ -9896,8 +9883,7 @@ LOCAL ERR ErrREPAIRRebuildSpace( Assert( pgnoNull != pgnoParent ); if ( pgnoNull != pgnoParent ) { - Call( ErrFILEFcbGet( ppib, ifmp, pgnoParent, objidNil, fcbRefParent ) ); - Call( ErrBTOpen( ppib, fcbRefParent.get(), &pfucbParent ) ); + Call( ErrBTOpen( ppib, pgnoParent, ifmp, &pfucbParent ) ); Assert( pfucbNil != pfucbParent ); Assert( pfcbNil != pfucbParent->u.pfcb ); Assert( pfucbParent->u.pfcb->FInitialized() ); @@ -10327,7 +10313,6 @@ LOCAL ERR ErrREPAIRFixLVs( { ERR err = JET_errSuccess; - FCBRef fcbRef; FUCB * pfucb = pfucbNil; BOOL fDone = fFalse; LvId lidCurr; @@ -10342,8 +10327,7 @@ LOCAL ERR ErrREPAIRFixLVs( (*popts->pcprintfVerbose)( "fixing long value tree\r\n" ); - Call( ErrFILEFcbGet( ppib, ifmp, pgnoLV, prepairtable->objidLV, fcbRef ) ); - Call( ErrDIROpen( ppib, fcbRef.get(), &pfucb ) ); + Call( ErrDIROpen( ppib, pgnoLV, ifmp, &pfucb ) ); Assert( pfucbNil != pfucb ); // Make sure the LV FCB is properly linked to the Table FCB, needed later to look up chunk-size Assert( pfucb->u.pfcb->PfcbTable() != pfcbNil ); @@ -10424,7 +10408,7 @@ LOCAL ERR ErrREPAIRFixLVs( DIRUp( pfucb ); } - Call( ErrDIROpen( ppib, fcbRef.get(), &pfucbLVRoot ) ); + Call( ErrDIROpen( ppib, pgnoLV, ifmp, &pfucbLVRoot ) ); (*popts->pcprintfVerbose)( "long value 0x%I64x has no root. creating a root with refcount %d and size %d\r\n", (_LID64)lidCurr, ulRefcount, ulSize ); @@ -10980,7 +10964,6 @@ LOCAL ERR ErrREPAIRFixRecords( { ERR err = JET_errSuccess; - FCBRef fcbRef; FUCB * pfucb = pfucbNil; INT crecordDeleted = 0; @@ -10992,9 +10975,7 @@ LOCAL ERR ErrREPAIRFixRecords( (*popts->pcprintfVerbose)( "fixing records\r\n" ); - Assert( objidNil != prepairtable->objidFDP ); - Call( ErrFILEFcbGet( ppib, ifmp, pgnoFDP, prepairtable->objidFDP, fcbRef ) ); - Call( ErrDIROpen( ppib, fcbRef.get(), &pfucb ) ); + Call( ErrDIROpen( ppib, pgnoFDP, ifmp, &pfucb ) ); Assert( pfucbNil != pfucb ); FUCBSetIndex( pfucb ); @@ -11109,7 +11090,6 @@ LOCAL ERR ErrREPAIRFixLVRefcounts( //- { ERR err = JET_errSuccess; - FCBRef fcbRef; FUCB * pfucb = pfucbNil; (*popts->pcprintfVerbose)( "fixing long value refcounts\r\n" ); @@ -11122,8 +11102,7 @@ LOCAL ERR ErrREPAIRFixLVRefcounts( } Call( pttmapLVTree->ErrMoveFirst() ); - Call( ErrFILEFcbGet( ppib, ifmp, pgnoLV, prepairtable->objidLV, fcbRef ) ); - Call( ErrDIROpen( ppib, fcbRef.get(), &pfucb ) ); + Call( ErrDIROpen( ppib, pgnoLV, ifmp, &pfucb ) ); // Mark the fcb as being an LV; (see ErrFILEIInitLVRoot for // other initialization that isn't needed) diff --git a/dev/ese/src/ese/space.cxx b/dev/ese/src/ese/space.cxx index a23135da..d218b3a4 100644 --- a/dev/ese/src/ese/space.cxx +++ b/dev/ese/src/ese/space.cxx @@ -2338,13 +2338,11 @@ ERR ErrSPGetLastPgno( _Inout_ PIB * ppib, _In_ const IFMP ifmp, _Out_ PGNO * ppg ERR ErrSPGetLastExtent( _Inout_ PIB * ppib, _In_ const IFMP ifmp, _Out_ EXTENTINFO * pextinfo ) { ERR err; - FCBRef fcbRef; FUCB *pfucb = pfucbNil; FUCB *pfucbOE = pfucbNil; DIB dib; - CallR( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRef ) ); - CallR( ErrBTOpen( ppib, fcbRef.get(), &pfucb ) ); + CallR( ErrBTOpen( ppib, pgnoSystemRoot, ifmp, &pfucb, openNormal, fTrue ) ); Assert( pfucbNil != pfucb ); PIBTraceContextScope tcScope = pfucb->ppib->InitTraceContextScope(); @@ -2357,23 +2355,14 @@ ERR ErrSPGetLastExtent( _Inout_ PIB * ppib, _In_ const IFMP ifmp, _Out_ EXTENTIN Assert( PinstFromPpib( ppib )->FRecovering() ); Assert( pfucb->u.pfcb->FInitedForRecovery() ); - pfucb->u.pfcb->AcquireAdditionalInitDuringRecovery(); - // pgnoOE and pgnoAE need to be obtained // - err = ErrSPInitFCB( ppib, fcbRef.get() ); + Call( ErrSPInitFCB( pfucb ) ); pfucb->u.pfcb->Lock(); - if ( err >= JET_errSuccess ) - { - pfucb->u.pfcb->ResetInitedForRecovery(); - } - - pfucb->u.pfcb->ReleaseAdditionalInitDuringRecovery(); + pfucb->u.pfcb->ResetInitedForRecovery(); pfucb->u.pfcb->Unlock(); - Call( err ); } - Assert( pfucb->u.pfcb->FSpaceInitialized() ); Assert( pfucb->u.pfcb->PgnoOE() != pgnoNull ); @@ -2422,39 +2411,14 @@ ERR ErrSPGetLastExtent( _Inout_ PIB * ppib, _In_ const IFMP ifmp, _Out_ EXTENTIN } -LOCAL ERR ErrSPIOpenAndGotoRoot( PIB* ppib, FCB* pfcb, FUCB** ppfucb ) -{ - ERR err; - FUCB* pfucb; - - CallR( ErrBTOpen( ppib, pfcb, &pfucb ) ); - Assert( pfucbNil != pfucb ); - Assert( pfcbNil != pfucb->u.pfcb ); - Assert( pfucb->u.pfcb->FInitialized() ); - - err = ErrBTIGotoRoot( pfucb, latchRIW ); - if ( err < JET_errSuccess ) - { - BTClose( pfucb ); - } - else - { - Assert( latchRIW == Pcsr( pfucb )->Latch() ); - Assert( pcsrNil == pfucb->pcsrRoot ); - pfucb->pcsrRoot = Pcsr( pfucb ); - - *ppfucb = pfucb; - } - - return err; -} - - // Validate I have not unintentionally changed SPACE_HEADER size. C_ASSERT( sizeof(SPACE_HEADER) == 16 ); -LOCAL VOID SPIInitFCB( FCB * pfcb, CSR* pcsr ) +LOCAL VOID SPIInitFCB( FUCB * pfucb, const BOOL fDeferredInit ) { + CSR * pcsr = ( fDeferredInit ? pfucb->pcsrRoot : Pcsr( pfucb ) ); + FCB * pfcb = pfucb->u.pfcb; + Assert( pcsr->FLatched() ); // need to acquire FCB lock because that's what protects the Flags @@ -2464,10 +2428,9 @@ LOCAL VOID SPIInitFCB( FCB * pfcb, CSR* pcsr ) { // get external header // - KEYDATAFLAGS kdf; - NDGetExternalHeader ( &kdf, pcsr, noderfSpaceHeader ); - Assert( sizeof( SPACE_HEADER ) == kdf.data.Cb() ); - const SPACE_HEADER * const psph = reinterpret_cast ( kdf.data.Pv() ); + NDGetExternalHeader ( pfucb, pcsr, noderfSpaceHeader ); + Assert( sizeof( SPACE_HEADER ) == pfucb->kdfCurr.data.Cb() ); + const SPACE_HEADER * const psph = reinterpret_cast ( pfucb->kdfCurr.data.Pv() ); if ( psph->FSingleExtent() ) { @@ -2481,10 +2444,13 @@ LOCAL VOID SPIInitFCB( FCB * pfcb, CSR* pcsr ) Assert( pfcb->PgnoAE() == pfcb->PgnoOE() + 1 ); } - if ( pfcb->FInitialized() ) + if ( !fDeferredInit ) { - Assert( !!psph->FNonUnique() == !pfcb->FUnique() ); + Assert( pfcb->FUnique() ); // FCB always initialised as unique + if ( psph->FNonUnique() ) + pfcb->SetNonUnique(); } + Assert( !!psph->FNonUnique() == !pfcb->FUnique() ); pfcb->SetSpaceInitialized(); @@ -2499,21 +2465,22 @@ LOCAL VOID SPIInitFCB( FCB * pfcb, CSR* pcsr ) // initializes FCB with pgnoAE and pgnoOE // -ERR ErrSPInitFCB( _In_ PIB * ppib, _Inout_ FCB * const pfcb ) +ERR ErrSPInitFCB( _Inout_ FUCB * const pfucb ) { ERR err; - CSR csr; + FCB *pfcb = pfucb->u.pfcb; - PIBTraceContextScope tcScope = ppib->InitTraceContextScope(); - tcScope->nParentObjectClass = pfcb->TCE( fTrue ); + PIBTraceContextScope tcScope = pfucb->ppib->InitTraceContextScope(); + tcScope->nParentObjectClass = TceFromFUCB( pfucb ); tcScope->iorReason.SetIort( iortSpace ); tcScope->SetDwEngineObjid( pfcb->ObjidFDP() ); - Expected( !pfcb->FSpaceInitialized() ); + Assert( !Pcsr( pfucb )->FLatched() ); + Assert( !FFUCBSpace( pfucb ) ); // goto root page of tree // - err = csr.ErrGetReadPage( ppib, pfcb->Ifmp(), pfcb->PgnoFDP(), bflfDefault ); + err = ErrBTIGotoRoot( pfucb, latchReadTouch ); if ( err < 0 ) { if ( g_fRepair ) @@ -2529,12 +2496,15 @@ ERR ErrSPInitFCB( _In_ PIB * ppib, _Inout_ FCB * const pfcb ) } else { - // objid from fcb and the page header must match + // get objidFDP from root page, FCB can only be set once - Assert( pfcb->ObjidFDP() == csr.Cpage().ObjidFDP() ); - SPIInitFCB( pfcb, &csr ); - csr.ReleasePage(); - err = JET_errSuccess; // clobber warnings + Assert( objidNil == pfcb->ObjidFDP() + || ( PinstFromIfmp( pfucb->ifmp )->FRecovering() && pfcb->ObjidFDP() == Pcsr( pfucb )->Cpage().ObjidFDP() ) ); + pfcb->SetObjidFDP( Pcsr( pfucb )->Cpage().ObjidFDP() ); + + SPIInitFCB( pfucb, fFalse ); + + BTUp( pfucb ); } return err; @@ -2557,14 +2527,18 @@ ERR ErrSPDeferredInitFCB( _Inout_ FUCB * const pfucb ) // goto root page of tree // - CallR( ErrSPIOpenAndGotoRoot( pfucb->ppib, pfcb, &pfucbT ) ); + CallR( ErrBTIOpenAndGotoRoot( + pfucb->ppib, + pfcb->PgnoFDP(), + pfucb->ifmp, + &pfucbT ) ); Assert( pfucbNil != pfucbT ); Assert( pfucbT->u.pfcb == pfcb ); Assert( pcsrNil != pfucbT->pcsrRoot ); if ( !pfcb->FSpaceInitialized() ) { - SPIInitFCB( pfcb, pfucbT->pcsrRoot ); + SPIInitFCB( pfucbT, fTrue ); } SPIValidateCpgOwnedAndAvail( pfucbT ); @@ -3334,7 +3308,6 @@ ERR ErrSPCreate( CPG *pcpgAEFDP ) { ERR err; - FCBRef fcbRef; FUCB *pfucb = pfucbNil; const BOOL fUnique = !( fSPFlags & fSPNonUnique ); @@ -3404,9 +3377,11 @@ ERR ErrSPCreate( // table. Implement a fix to allow leaving the FCB in an uninitialized // state, then have it initialized by the subsequent DIR/BTOpen. // - Call( ErrFILEIOpenFCB( ppib, ifmp, pgnoFDP, objidNil, openNew, fcbRef ) ); + CallR( ErrBTOpen( ppib, pgnoFDP, ifmp, &pfucb, openNew ) ); + + tcScope->nParentObjectClass = TceFromFUCB( pfucb ); - FCB* pfcb = fcbRef.get(); + FCB *pfcb = pfucb->u.pfcb; Assert( pfcbNil != pfcb ); if ( pgnoSystemRoot == pgnoFDP ) @@ -3436,7 +3411,7 @@ ERR ErrSPCreate( } else { - Call( g_rgfmp[ ifmp ].ErrObjidLastIncrementAndGet( pobjidFDP ) ); + Call( g_rgfmp[ pfucb->ifmp ].ErrObjidLastIncrementAndGet( pobjidFDP ) ); } Assert( pgnoSystemRoot != pgnoFDP || objidSystemRoot == *pobjidFDP ); @@ -3451,9 +3426,6 @@ ERR ErrSPCreate( pfcb->SetSpaceInitialized(); pfcb->Unlock(); - CallR( ErrBTOpen( ppib, pfcb, &pfucb ) ); // can't latch root page, it doesn't exist yet - tcScope->nParentObjectClass = TceFromFUCB( pfucb ); - if ( fSPFlags & fSPMultipleExtent ) { Assert( PgnoFDP( pfucb ) == pgnoFDP ); @@ -3505,7 +3477,6 @@ ERR ErrSPCreate( Assert( !FFUCBVersioned( pfucb ) ); HandleError: - Assert( err != errFCBExists ); // creation of an FCB for a new objid can't conflict Assert( ( pfucb != pfucbNil ) || ( err < JET_errSuccess ) ); if ( pfucb != pfucbNil ) @@ -6075,7 +6046,7 @@ LOCAL ERR ErrSPIGetExt( if ( !pfcb->FSpaceInitialized() ) { - SPIInitFCB( pfcb, pfucbSrc->pcsrRoot ); + SPIInitFCB( pfucbSrc, fTrue ); } // @@ -6421,7 +6392,6 @@ ERR ErrSPGetExt( FUCB *pfucbParent = pfucbNil; CPG cpgOEFDP; CPG cpgAEFDP; - FCBRef fcbRef; PIBTraceContextScope tcScope = pfucb->ppib->InitTraceContextScope(); tcScope->nParentObjectClass = TceFromFUCB( pfucb ); @@ -6434,15 +6404,14 @@ ERR ErrSPGetExt( // open cursor on Parent and RIW latch root page // - Call( ErrFILEFcbGet( pfucb->ppib, pfucb->ifmp, pgnoParentFDP, objidNil, fcbRef ) ); - Call( ErrSPIOpenAndGotoRoot( pfucb->ppib, fcbRef.get(), &pfucbParent ) ); + Call( ErrBTIOpenAndGotoRoot( pfucb->ppib, pgnoParentFDP, pfucb->ifmp, &pfucbParent ) ); // allocate an extent // Note: We get back info on OE and AE so we can add the value to the cpg cache // AFTER we've released the root. This is because adding a value to the cpg cache // may cause a split in the cpg cache table, and that means we'd need to get space // from the DBRoot. If pgnoParentFDP happens to be systemRoot, that results in - // trying to latch the page twice, one in ErrSPIOpenAndGotoRoot and one several + // trying to latch the page twice, one in ErrBTIOpenAndGotoRoot and one several // levels lower in the callstack. err = ErrSPIGetExt( @@ -7386,7 +7355,7 @@ ERR ErrSPGetPage( if ( !pfcb->FSpaceInitialized() ) { - SPIInitFCB( pfcb, pfucb->pcsrRoot ); + SPIInitFCB( pfucb, fTrue ); } // @@ -7510,7 +7479,7 @@ LOCAL ERR ErrSPIFreeSEToParent( // parent must always be in memory // - pfcbParent = FCB::PfcbFCBGet( pfucb->ifmp, pgnoParentFDP, &fcbsf, fTrue ); + pfcbParent = FCB::PfcbFCBGet( pfucb->ifmp, pgnoParentFDP, &fcbsf, fTrue, fTrue ); Assert( pfcbParent != pfcbNil ); Assert( fcbsf & fcbsfInitialized ); Assert( !pfcb->FTypeNull() ); @@ -7566,7 +7535,7 @@ LOCAL ERR ErrSPIFreeSEToParent( // if ( pfucbParentLocal == pfucbNil ) { - Call( ErrSPIOpenAndGotoRoot( pfucb->ppib, pfcbParent, &pfucbParentLocal ) ); + Call( ErrBTIOpenAndGotoRoot( pfucb->ppib, pgnoParentFDP, pfucb->ifmp, &pfucbParentLocal ) ); } else { @@ -8922,7 +8891,7 @@ ERR ErrSPFreeExt( FUCB* const pfucb, const PGNO pgnoFirst, const CPG cpgSize, co if ( !pfcb->FSpaceInitialized() ) { - SPIInitFCB( pfcb, pfucb->pcsrRoot ); + SPIInitFCB( pfucb, fTrue ); } // @@ -9329,12 +9298,10 @@ ERR ErrSPShelvePage( PIB* const ppib, const IFMP ifmp, const PGNO pgno ) Assert( pfmp->FBeyondPgnoShrinkTarget( pgno ) ); Assert( pgno <= pfmp->PgnoLast() ); - FUCB* pfucbRoot = pfucbNil; - FUCB* pfucbAE = pfucbNil; - FCBRef fcbRefSystemRoot; + FUCB* pfucbRoot = pfucbNil; + FUCB* pfucbAE = pfucbNil; - Call( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRefSystemRoot ) ); - Call( ErrSPIOpenAndGotoRoot( ppib, fcbRefSystemRoot.get(), &pfucbRoot)); + Call( ErrBTIOpenAndGotoRoot( ppib, pgnoSystemRoot, ifmp, &pfucbRoot ) ); Call( ErrSPIOpenAvailExt( pfucbRoot, &pfucbAE ) ); Call( ErrSPIReserveSPBufPages( pfucbRoot ) ); @@ -9372,12 +9339,10 @@ ERR ErrSPShelvePage( PIB* const ppib, const IFMP ifmp, const PGNO pgno ) //- ERR ErrSPUnshelveShelvedPagesBelowEof( PIB* const ppib, const IFMP ifmp ) { - ERR err = JET_errSuccess; - FMP* const pfmp = g_rgfmp + ifmp; - FUCB* pfucbRoot = pfucbNil; - FCBRef fcbRefSystemRoot; - BOOL fInTransaction = fFalse; - + ERR err = JET_errSuccess; + FMP* const pfmp = g_rgfmp + ifmp; + FUCB* pfucbRoot = pfucbNil; + BOOL fInTransaction = fFalse; Assert( !pfmp->FIsTempDB() ); Expected( pfmp->FShrinkIsRunning() ); @@ -9387,8 +9352,7 @@ ERR ErrSPUnshelveShelvedPagesBelowEof( PIB* const ppib, const IFMP ifmp ) Call( ErrDIRBeginTransaction( ppib, 46018, NO_GRBIT ) ); fInTransaction = fTrue; - Call( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRefSystemRoot ) ); - Call( ErrSPIOpenAndGotoRoot( ppib, fcbRefSystemRoot.get(), &pfucbRoot ) ); + Call( ErrBTIOpenAndGotoRoot( ppib, pgnoSystemRoot, ifmp, &pfucbRoot ) ); Call( ErrSPIUnshelvePagesInRange( pfucbRoot, 1, pfmp->PgnoLast() ) ); @@ -10056,7 +10020,6 @@ ERR ErrSPReclaimSpaceLeaks( PIB* const ppib, const IFMP ifmp ) PGNO pgnoFirstShelved = pgnoNull, pgnoLastShelved = pgnoNull; FUCB* pfucbCatalog = pfucbNil; FUCB* pfucbRoot = pfucbNil; - FCBRef fcbRefRoot; CSparseBitmap spbmOwned; CArray arrShelved; @@ -10074,8 +10037,6 @@ ERR ErrSPReclaimSpaceLeaks( PIB* const ppib, const IFMP ifmp ) Assert( ifmpDummy == ifmp ); fDbOpen = fTrue; - Call( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRefRoot ) ); - // Reclaiming leaked space requires a fully populated MSysObjids table because // that is how we enumerate all the tables efficiently. { @@ -10239,7 +10200,7 @@ ERR ErrSPReclaimSpaceLeaks( PIB* const ppib, const IFMP ifmp ) Call( ErrDIRBeginTransaction( ppib, 37218, NO_GRBIT ) ); fInTransaction = fTrue; - Call( ErrDIROpen( ppib, fcbRefRoot.get(), &pfucbRoot ) ); + Call( ErrDIROpen( ppib, pgnoSystemRoot, ifmp, &pfucbRoot ) ); CSPExtentInfo spext; err = ErrSPIFindExtOE( ppib, pfucbRoot->u.pfcb, pgnoFirstToReclaim, &spext ); @@ -10358,7 +10319,7 @@ ERR ErrSPReclaimSpaceLeaks( PIB* const ppib, const IFMP ifmp ) Call( ErrDIRBeginTransaction( ppib, 51150, NO_GRBIT ) ); fInTransaction = fTrue; - Call( ErrDIROpen( ppib, fcbRefRoot.get(), &pfucbRoot ) ); + Call( ErrDIROpen( ppib, pgnoSystemRoot, ifmp, &pfucbRoot ) ); const PGNO pgnoLastToReclaimBelowEof = UlFunctionalMin( pgnoLastToReclaim, pgnoLastInitial ); if ( pgnoFirstToReclaim <= pgnoLastInitial ) { @@ -10421,8 +10382,6 @@ ERR ErrSPReclaimSpaceLeaks( PIB* const ppib, const IFMP ifmp ) pfucbRoot = pfucbNil; } - fcbRefRoot.reset(); - // WARNING: most (if not all) of the above is done without versioning, so there // really isn't any rollback of the update. if ( fInTransaction ) @@ -10902,7 +10861,6 @@ ERR ErrSPFreeFDP( ERR err; const IFMP ifmp = pfcbFDPToFree->Ifmp(); const PGNO pgnoFDPFree = pfcbFDPToFree->PgnoFDP(); - FCBRef fcbRefParent; FUCB *pfucbParent = pfucbNil; FUCB *pfucb = pfucbNil; CPG cpgRootCaptured = 0; @@ -10935,9 +10893,7 @@ ERR ErrSPFreeFDP( Assert( !FFMPIsTempDB( ifmp ) || pgnoSystemRoot == pgnoFDPParent ); - OBJID objidParent = ( pgnoSystemRoot == pgnoFDPParent ? objidSystemRoot : objidNil ); - Call( ErrFILEFcbGet( ppib, ifmp, pgnoFDPParent, objidParent, fcbRefParent ) ); - Call( ErrBTOpen( ppib, fcbRefParent.get(), &pfucbParent ) ); + Call( ErrBTOpen( ppib, pgnoFDPParent, ifmp, &pfucbParent ) ); Assert( pfucbNil != pfucbParent ); Assert( pfucbParent->u.pfcb->FInitialized() ); @@ -10983,7 +10939,7 @@ ERR ErrSPFreeFDP( if ( !pfucb->u.pfcb->FSpaceInitialized() ) { - SPIInitFCB( pfucb->u.pfcb, pfucb->pcsrRoot ); + SPIInitFCB( pfucb, fTrue ); } // We expect this to fail to find the FCB in the cache if we're deleting it. @@ -12119,7 +12075,6 @@ ERR ErrSPExtendDB( ERR err; FUCB *pfucbDbRoot = pfucbNil; FUCB *pfucbAE = pfucbNil; - FCBRef fcbRefSystemRoot; PIBTraceContextScope tcScope = ppib->InitTraceContextScope(); tcScope->iorReason.SetIort( iortSpace ); @@ -12127,8 +12082,7 @@ ERR ErrSPExtendDB( // open cursor on System / DB Root and RIW latch root page // - CallR( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRefSystemRoot ) ); - CallR( ErrSPIOpenAndGotoRoot( ppib, fcbRefSystemRoot.get(), &pfucbDbRoot ) ); + CallR( ErrBTIOpenAndGotoRoot( ppib, pgnoSystemRoot, ifmp, &pfucbDbRoot ) ); tcScope->nParentObjectClass = TceFromFUCB( pfucbDbRoot ); Assert( objidSystemRoot == ObjidFDP( pfucbDbRoot ) ); @@ -12287,7 +12241,6 @@ ERR ErrSPShrinkTruncateLastExtent( FUCB* pfucbRoot = pfucbNil; FUCB* pfucbOE = pfucbNil; FUCB* pfucbAE = pfucbNil; - FCBRef fcbRefSystemRoot; PIBTraceContextScope tcScope = ppib->InitTraceContextScope( ); tcScope->iorReason.SetIort( iortDbShrink ); tcScope->SetDwEngineObjid( objidSystemRoot ); @@ -12310,8 +12263,7 @@ ERR ErrSPShrinkTruncateLastExtent( fInTransaction = fTrue; // Open space trees. - Call( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRefSystemRoot ) ); - Call( ErrSPIOpenAndGotoRoot( ppib, fcbRefSystemRoot.get(), &pfucbRoot ) ); + Call( ErrBTIOpenAndGotoRoot( ppib, pgnoSystemRoot, ifmp, &pfucbRoot ) ); Call( ErrSPIOpenOwnExt( pfucbRoot, &pfucbOE ) ); Call( ErrSPIOpenAvailExt( pfucbRoot, &pfucbAE ) ); @@ -13396,9 +13348,10 @@ ERR ErrSPReserveSPBufPages( FUCB* pfucbOwningTree = pfucbNil; // Open FUCB of the owning tree, just in case this is a space FUCB. - Call( ErrSPIOpenAndGotoRoot( + Call( ErrBTIOpenAndGotoRoot( pfucb->ppib, - pfucb->u.pfcb, + pfucb->u.pfcb->PgnoFDP(), + pfucb->ifmp, &pfucbOwningTree ) ); Assert( pfucbOwningTree->u.pfcb == pfucb->u.pfcb ); @@ -13464,7 +13417,6 @@ LOCAL ERR ErrSPIReserveSPBufPages( ERR err = JET_errSuccess; FMP* const pfmp = &g_rgfmp[ pfucb->ifmp ]; FCB* const pfcb = pfucb->u.pfcb; - FCBRef fcbRefParent; FUCB* pfucbParentLocal = pfucbParent; FUCB* pfucbOE = pfucbNil; FUCB* pfucbAE = pfucbNil; @@ -13480,10 +13432,17 @@ LOCAL ERR ErrSPIReserveSPBufPages( Assert( ( pgnoParentFDP != pgnoNull ) || ( pfucbParent == pfucbNil ) ); if ( ( pfucbParentLocal == pfucbNil ) && ( pgnoParentFDP != pgnoNull ) ) { - // Open cursor on parent FDP to get space from. + // Open cursor on parent FDP to get space from. Don't GotoRoot yet, we don't want to be latched + // while calling ErrSPIReserveSPBufPages. // - Call( ErrFILEFcbGet( pfucb->ppib, pfucb->ifmp, pgnoParentFDP, objidNil, fcbRefParent ) ); - Call( ErrBTOpen( pfucb->ppib, fcbRefParent.get(), &pfucbParentLocal ) ); + Call( ErrBTIOpen( + pfucb->ppib, + pfucb->ifmp, + pgnoParentFDP, + objidNil, + openNormal, + &pfucbParentLocal, + fFalse ) ); Assert( pcsrNil == pfucbParentLocal->pcsrRoot ); } @@ -13916,13 +13875,20 @@ LOCAL ERR ErrSPIGetSe( AssertSPIPfucbOnRoot( pfucb ); { - FCBRef fcbRefParent; FUCB *pfucbParentLocal = pfucbNil; - // Open cursor on parent FDP to get space from. + // Open cursor on parent FDP to get space from. Don't GotoRoot yet, we don't want to be latched + // while calling ErrSPIReserveSPBufPages, but it can be a time savings to already have an FUCB + // that we can use for multiple calls. // - Call( ErrFILEFcbGet( pfucb->ppib, pfucb->ifmp, pgnoParentFDP, objidNil, fcbRefParent ) ); - Call( ErrBTOpen( pfucb->ppib, fcbRefParent.get(), &pfucbParentLocal ) ); + Call( ErrBTIOpen( + pfucb->ppib, + pfucb->ifmp, + pgnoParentFDP, + objidNil, + openNormal, + &pfucbParentLocal, + fFalse ) ); Assert( pcsrNil == pfucbParentLocal->pcsrRoot ); CallJ( ErrSPIReserveSPBufPages( pfucb, pfucbParentLocal ), CloseParent ); @@ -15579,10 +15545,7 @@ ERR ErrSPGetInfo( Assert( !FSPReachablePages( fSPExtents ) ); if ( pfucbNil == pfucb ) { - FCBRef fcbRef; - CallR( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRef ) ); - CallR( ErrBTOpen( ppib, fcbRef.get(), &pfucbT) ); - Assert( fcbRef->FInitialized() ); // an initialized FCB isn't purged when fcbRef goes out of scope + err = ErrBTOpen( ppib, pgnoSystemRoot, ifmp, &pfucbT ); } else { @@ -15629,7 +15592,7 @@ ERR ErrSPGetInfo( { // UNDONE: Are there concurrency issues with updating the FCB // while we only have a read latch? - SPIInitFCB( pfucbT->u.pfcb, pfucbT->pcsrRoot ); + SPIInitFCB( pfucbT, fTrue ); if( !FSPIIsSmall( pfucbT->u.pfcb ) ) { BFPrereadPageRange( pfucbT->ifmp, pfucbT->u.pfcb->PgnoOE(), 2, NULL, NULL, bfprfDefault, ppib->BfpriPriority( pfucbT->ifmp ), *tcScope ); @@ -16341,10 +16304,7 @@ ERR ErrSPGetExtentInfo( if ( pfucbNil == pfucb ) { - FCBRef fcbRef; - CallR( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRef ) ); - CallR( ErrBTOpen( ppib, fcbRef.get(), &pfucbT ) ); - Assert( fcbRef->FInitialized() ); // an initialized FCB isn't purged when fcbRef goes out of scope + err = ErrBTOpen( ppib, pgnoSystemRoot, ifmp, &pfucbT ); } else { @@ -16366,7 +16326,7 @@ ERR ErrSPGetExtentInfo( { // UNDONE: Are there cuncurrency issues with updating the FCB // while we only have a read latch? - SPIInitFCB( pfucbT->u.pfcb, pfucbT->pcsrRoot ); + SPIInitFCB( pfucbT, fTrue ); if( !FSPIIsSmall( pfucbT->u.pfcb ) ) { BFPrereadPageRange( pfucbT->ifmp, pfucbT->u.pfcb->PgnoOE(), 2, bfprfDefault, ppib->BfpriPriority( pfucbT->ifmp ), *tcScope ); @@ -16755,7 +16715,6 @@ ERR ErrSPTrimRootAvail( CPG cpgAvailExtTotalSparseAfter = 0; FUCB *pfucbT = pfucbNil; FUCB *pfucbAE = pfucbNil; - FCBRef fcbRef; PIBTraceContextScope tcScope = ppib->InitTraceContextScope(); tcScope->iorReason.SetIort( iortSpace ); @@ -16764,15 +16723,14 @@ ERR ErrSPTrimRootAvail( memset( (void*)&spbufOnAE, 0, sizeof(spbufOnAE) ); - CallR( ErrFILEFcbGet( ppib, ifmp, pgnoSystemRoot, objidSystemRoot, fcbRef ) ); - Call( ErrSPIOpenAndGotoRoot( ppib, fcbRef.get(), &pfucbT)); + Call( ErrBTIOpenAndGotoRoot( ppib, pgnoSystemRoot, ifmp, &pfucbT ) ); AssertSPIPfucbOnRoot( pfucbT ); if ( !pfucbT->u.pfcb->FSpaceInitialized() ) { // UNDONE: Are there cuncurrency issues with updating the FCB // while we only have a read latch? - SPIInitFCB( pfucbT->u.pfcb, pfucbT->pcsrRoot ); + SPIInitFCB( pfucbT, fTrue ); if( !FSPIIsSmall( pfucbT->u.pfcb ) ) { BFPrereadPageRange( pfucbT->ifmp, pfucbT->u.pfcb->PgnoAE(), 2, bfprfDefault, ppib->BfpriPriority( pfucbT->ifmp ), *tcScope ); diff --git a/dev/ese/src/inc/_bt.hxx b/dev/ese/src/inc/_bt.hxx index d9d56845..99427266 100644 --- a/dev/ese/src/inc/_bt.hxx +++ b/dev/ese/src/inc/_bt.hxx @@ -3,6 +3,8 @@ // used only space and BT // +ERR ErrBTIOpenAndGotoRoot( PIB *ppib, const PGNO pgnoFDP, const IFMP ifmp, FUCB **ppfucb ); + ERR ErrBTIIRefresh( FUCB *pfucb, LATCH latch ); // used by recovery diff --git a/dev/ese/src/inc/bt.hxx b/dev/ese/src/inc/bt.hxx index 253265b0..fb831eb2 100644 --- a/dev/ese/src/inc/bt.hxx +++ b/dev/ese/src/inc/bt.hxx @@ -11,6 +11,14 @@ // +enum OPENTYPE +{ + openNormal, // normal open cursor (may be either unique or non-unique btree) + openNormalUnique, // normal open cursor (unique btree only) + openNormalNonUnique, // normal open cursor (non-unique btree only) + openNew // open cursor on newly-created FDP +}; + // ************************************** // open/close opearations // @@ -18,6 +26,55 @@ ERR ErrBTOpen( PIB *ppib, FCB *pfcb, FUCB **ppfucb, BOOL fAllowReuse = fTrue ); ERR ErrBTOpenByProxy( PIB *ppib, FCB *pfcb, FUCB **ppfucb, const LEVEL level ); VOID BTClose( FUCB *pfucb ); +ERR ErrBTIOpen( + PIB *ppib, + const IFMP ifmp, + const PGNO pgnoFDP, + const OBJID objidFDP, + const OPENTYPE opentype, + FUCB **ppfucb, + BOOL fWillInitFCB ); + +INLINE ERR ErrBTOpen( + PIB *ppib, + const PGNO pgnoFDP, + const IFMP ifmp, + FUCB **ppfucb, + const OPENTYPE opentype = openNormal, + BOOL fWillInitFCB = fFalse ) +{ + Assert( openNormal == opentype || openNew == opentype ); + return ErrBTIOpen( + ppib, + ifmp, + pgnoFDP, + objidNil, + opentype, + ppfucb, + fWillInitFCB ); +} + +// open cursor, don't touch root page +INLINE ERR ErrBTOpenNoTouch( + PIB *ppib, + const IFMP ifmp, + const PGNO pgnoFDP, + const OBJID objidFDP, + const BOOL fUnique, + FUCB **ppfucb, + BOOL fWillInitFCB = fFalse ) +{ + Assert( objidNil != objidFDP ); + return ErrBTIOpen( + ppib, + ifmp, + pgnoFDP, + objidFDP, + fUnique ? openNormalUnique : openNormalNonUnique, + ppfucb, + fWillInitFCB ); +} + // ************************************** // retrieve/release operations diff --git a/dev/ese/src/inc/cat.hxx b/dev/ese/src/inc/cat.hxx index 3a7a4a2d..385f38a9 100644 --- a/dev/ese/src/inc/cat.hxx +++ b/dev/ese/src/inc/cat.hxx @@ -816,9 +816,9 @@ ERR ErrCATGetColumnCallbackInfo( ULONG * const pchDependantColumns ); -ERR ErrCATInitCatalogFCB( PIB* ppib, FCB* pfcb ); -ERR ErrCATInitTempFCB( PIB* ppib, FCB* pfcb ); -ERR ErrCATInitFCB( PIB* ppib, FCB* pfcb, OBJID objidTable, const BOOL fSkipPgnoFDPLastSetTime ); +ERR ErrCATInitCatalogFCB( FUCB *pfucbTable ); +ERR ErrCATInitTempFCB( FUCB *pfucbTable ); +ERR ErrCATInitFCB( FUCB *pfucbTable, OBJID objidTable, const BOOL fSkipPgnoFDPLastSetTime ); enum CATCheckIndicesFlags : ULONG // catcif { diff --git a/dev/ese/src/inc/dir.hxx b/dev/ese/src/inc/dir.hxx index cc384285..aa29dd2f 100644 --- a/dev/ese/src/inc/dir.hxx +++ b/dev/ese/src/inc/dir.hxx @@ -120,6 +120,8 @@ ERR ErrDIRCreateDirectory( // ERR ErrDIROpen( PIB *ppib, FCB *pfcb, FUCB **ppfucb ); ERR ErrDIROpenByProxy( PIB *ppib, FCB *pfcb, FUCB **ppfucb, LEVEL level ); +ERR ErrDIROpen( PIB *ppib, PGNO pgnoFDP, IFMP ifmp, FUCB **ppfucb, BOOL fWillInitFCB = fFalse ); +ERR ErrDIROpenNoTouch( PIB *ppib, IFMP ifmp, PGNO pgnoFDP, OBJID objidFDP, BOOL fUnique, FUCB **ppfucb, BOOL fWillInitFCB = fFalse ); INLINE VOID DIRInitOpenedCursor( FUCB * const pfucb, const LEVEL level ) { FUCBSetLevelNavigate( pfucb, level ); diff --git a/dev/ese/src/inc/fcb.hxx b/dev/ese/src/inc/fcb.hxx index fa596bc8..8112f276 100644 --- a/dev/ese/src/inc/fcb.hxx +++ b/dev/ese/src/inc/fcb.hxx @@ -883,13 +883,13 @@ private: BOOL FInitialIndex() const; VOID SetInitialIndex(); - BOOL FInitialized() const volatile; + BOOL FInitialized() const; private: VOID SetInitialized_(); VOID ResetInitialized_(); public: - BOOL FInitedForRecovery() const volatile; + BOOL FInitedForRecovery() const; VOID SetInitedForRecovery(); VOID ResetInitedForRecovery(); @@ -900,9 +900,9 @@ private: VOID SetVersioningOffForExtentPageCountCache(); VOID ResetVersioningOffForExtentPageCountCache(); - VOID AcquireAdditionalInitDuringRecovery(); - VOID ReleaseAdditionalInitDuringRecovery(); - BOOL FDoingAdditionalInitializationDuringRecovery() const volatile; + BOOL FDoingAdditionalInitializationDuringRecovery() const; + VOID SetDoingAdditionalInitializationDuringRecovery(); + VOID ResetDoingAdditionalInitializationDuringRecovery(); BOOL FInList() const; VOID SetInList(); @@ -932,7 +932,7 @@ private: VOID SetPreread(); VOID ResetPreread(); - BOOL FSpaceInitialized() const volatile; + BOOL FSpaceInitialized() const; VOID SetSpaceInitialized(); VOID ResetSpaceInitialized(); @@ -1023,7 +1023,7 @@ private: VOID ResetUpdatingAndLeaveDML(); VOID ResetUpdating(); - VOID IncrementRefCount( BOOL fOwnWriteLock = fFalse ); + VOID IncrementRefCount(); private: VOID ResetUpdating_(); @@ -1064,7 +1064,7 @@ private: // ===================================================================== // FCB creation/deletion. public: - static FCB *PfcbFCBGet( const IFMP ifmp, const PGNO pgnoFDP, FCBStateFlags* const pfcbsf = NULL, const BOOL fIncrementRefCount = fTrue ); + static FCB *PfcbFCBGet( const IFMP ifmp, const PGNO pgnoFDP, FCBStateFlags* const pfcbsf = NULL, const BOOL fIncrementRefCount = fTrue, const BOOL fInitForRecovery = fFalse, OBJID* const pobjid = NULL ); static ERR ErrCreate( PIB *ppib, IFMP ifmp, PGNO pgnoFDP, FCB **ppfcb ); VOID CreateComplete_( ERR err, PCSTR szFile, const LONG lLine ); VOID PrepareForPurge( const BOOL fPrepareChildren = fTrue ); @@ -1079,7 +1079,7 @@ private: static BOOL FScanAndPurge_( _In_ INST * pinst, _In_ PIB * ppib, const BOOL fThreshold ); static BOOL FCloseToQuota_( INST * pinst ) { return pinst->m_cresFCB.FCloseToQuota(); }; static VOID PurgeObjects_( INST* const pinst, const IFMP ifmp, const PGNO pgnoFDP, const BOOL fTerminating ); - BOOL FCheckFreeAndPurge_( _In_ const BOOL fThreshold ); + BOOL FCheckFreeAndPurge_( _In_ PIB *ppib, _In_ const BOOL fThreshold ); VOID CloseAllCursorsOnFCB_( const BOOL fTerminating ); VOID Delete_( INST *pinst ); BOOL FHasCallbacks_( INST *pinst ); @@ -1140,7 +1140,7 @@ private: public: VOID InsertHashTable(); VOID DeleteHashTable(); - VOID Release( BOOL fPreventMoveToAvail = fFalse ); + VOID Release(); static BOOL FInHashTable( IFMP ifmp, PGNO pgnoFDB, FCB **ppfcb = NULL ); private: @@ -1449,7 +1449,7 @@ INLINE PGNO FCB::PgnoAE() const { return m_pgnoAE; } INLINE BFLatch* FCB::PBFLatchHintPgnoAE() { Assert( NULL == m_bflPgnoAE.pv ); return &m_bflPgnoAE; } INLINE IFMP FCB::Ifmp() const { return m_ifmp; } INLINE SHORT FCB::CbDensityFree() const { return m_spacehints.m_cbDensityFree; } -INLINE LONG FCB::WRefCount() const { return AtomicRead( const_cast( &m_wRefCount ) ); } // refcount is read without locks on multiple threads for synchronization +INLINE LONG FCB::WRefCount() const { return m_wRefCount; } INLINE RCE *FCB::PrceNewest() const { return m_prceNewest; } INLINE RCE *FCB::PrceOldest() const { return m_prceOldest; } INLINE USHORT FCB::CrefDomainDenyRead() const { return m_crefDomainDenyRead; } @@ -1586,7 +1586,7 @@ INLINE VOID FCB::SetDerivedIndex() { Assert( IsLocked() ); AtomicEx INLINE BOOL FCB::FInitialIndex() const { return !!(m_ulFCBFlags & mskFCBInitialIndex ); } INLINE VOID FCB::SetInitialIndex() { Assert( IsLocked() ); AtomicExchangeSet( &m_ulFCBFlags, mskFCBInitialIndex ); } -INLINE BOOL FCB::FInitialized() const volatile { return !!(m_ulFCBFlags & mskFCBInitialized ); } +INLINE BOOL FCB::FInitialized() const { return !!(m_ulFCBFlags & mskFCBInitialized ); } INLINE VOID FCB::SetInitialized_() { Assert( IsLocked() ); AtomicExchangeSet( &m_ulFCBFlags, mskFCBInitialized ); } INLINE VOID FCB::ResetInitialized_() { Assert( IsLocked() ); AtomicExchangeReset( &m_ulFCBFlags, mskFCBInitialized ); } @@ -1594,11 +1594,13 @@ INLINE BOOL FCB::FVersioningOffForExtentPageCountCache() const { return !!(m_ulF INLINE VOID FCB::SetVersioningOffForExtentPageCountCache() { AtomicExchangeSet( &m_ulFCBFlags, mskFCBVersioningOff ); } INLINE VOID FCB::ResetVersioningOffForExtentPageCountCache() { AtomicExchangeReset( &m_ulFCBFlags, mskFCBVersioningOff ); } -INLINE BOOL FCB::FInitedForRecovery() const volatile { return !!( m_ulFCBFlags & mskFCBInitedForRecovery ); } -INLINE VOID FCB::SetInitedForRecovery() { Assert( IsLocked() ); AtomicExchangeSet( &m_ulFCBFlags, mskFCBInitedForRecovery ); } -INLINE VOID FCB::ResetInitedForRecovery() { Assert( IsLocked() ); AtomicExchangeReset( &m_ulFCBFlags, mskFCBInitedForRecovery ); } +INLINE BOOL FCB::FInitedForRecovery() const { return !!(m_ulFCBFlags & mskFCBInitedForRecovery ); } +INLINE VOID FCB::SetInitedForRecovery() { Assert( IsLocked() ); AtomicExchangeSet( &m_ulFCBFlags, mskFCBInitedForRecovery ); } +INLINE VOID FCB::ResetInitedForRecovery() { Assert( IsLocked() ); AtomicExchangeReset( &m_ulFCBFlags, mskFCBInitedForRecovery ); } -INLINE BOOL FCB::FDoingAdditionalInitializationDuringRecovery() const volatile { return !!( m_ulFCBFlags & mskFCBDoingAdditionalInitializationDuringRecovery ); } +INLINE BOOL FCB::FDoingAdditionalInitializationDuringRecovery() const { return !!(m_ulFCBFlags & mskFCBDoingAdditionalInitializationDuringRecovery ); } +INLINE VOID FCB::SetDoingAdditionalInitializationDuringRecovery() { Assert( IsLocked() ); AtomicExchangeSet( &m_ulFCBFlags, mskFCBDoingAdditionalInitializationDuringRecovery ); } +INLINE VOID FCB::ResetDoingAdditionalInitializationDuringRecovery() { Assert( IsLocked() ); AtomicExchangeReset( &m_ulFCBFlags, mskFCBDoingAdditionalInitializationDuringRecovery ); } INLINE BOOL FCB::FInList() const { return m_fFCBInList; } INLINE VOID FCB::SetInList() { m_fFCBInList = fTrue; } @@ -1628,9 +1630,9 @@ INLINE BOOL FCB::FPreread() const { return !!(m_ulFCBFlags & mskFC INLINE VOID FCB::SetPreread() { Assert( IsLocked() ); AtomicExchangeSet( &m_ulFCBFlags, mskFCBPreread ); } INLINE VOID FCB::ResetPreread() { Assert( IsLocked() ); AtomicExchangeReset( &m_ulFCBFlags, mskFCBPreread ); } -INLINE BOOL FCB::FSpaceInitialized() const volatile { return !!(m_ulFCBFlags & mskFCBSpaceInitialized ); } -INLINE VOID FCB::SetSpaceInitialized() { Assert( IsLocked() ); AtomicExchangeSet( &m_ulFCBFlags, mskFCBSpaceInitialized ); } -INLINE VOID FCB::ResetSpaceInitialized() { Assert( IsLocked() ); AtomicExchangeReset( &m_ulFCBFlags, mskFCBSpaceInitialized ); } +INLINE BOOL FCB::FSpaceInitialized() const { return !!(m_ulFCBFlags & mskFCBSpaceInitialized ); } +INLINE VOID FCB::SetSpaceInitialized() { Assert( IsLocked() ); AtomicExchangeSet( &m_ulFCBFlags, mskFCBSpaceInitialized ); } +INLINE VOID FCB::ResetSpaceInitialized() { Assert( IsLocked() ); AtomicExchangeReset( &m_ulFCBFlags, mskFCBSpaceInitialized ); } INLINE BOOL FCB::FTryPurgeOnClose() const { return !!(m_ulFCBFlags & mskFCBTryPurgeOnClose ); } INLINE VOID FCB::SetTryPurgeOnClose() { Assert( IsLocked() ); AtomicExchangeSet( &m_ulFCBFlags, mskFCBTryPurgeOnClose ); } @@ -1943,7 +1945,7 @@ INLINE VOID FCB::AssertDDL() // ========================================================================= // Hashing. -INLINE VOID FCB::Release( BOOL fPreventMoveToAvail /* = fFalse */ ) +INLINE VOID FCB::Release() { #ifdef DEBUG FCB *pfcbT; @@ -1964,7 +1966,7 @@ INLINE VOID FCB::Release( BOOL fPreventMoveToAvail /* = fFalse */ ) Assert( pfcbT == this || ( FDeleteCommitted() && ObjidFDP() != pfcbT->ObjidFDP() ) ); } #endif // DEBUG - DecrementRefCountAndUnlink_( pfucbNil, fTrue, fPreventMoveToAvail ); + DecrementRefCountAndUnlink_( pfucbNil, fTrue ); } INLINE BOOL FCB::FInHashTable( IFMP ifmp, PGNO pgnoFDP, FCB **ppfcb ) diff --git a/dev/ese/src/inc/file.hxx b/dev/ese/src/inc/file.hxx index 670ebcf0..e7a1d5ef 100644 --- a/dev/ese/src/inc/file.hxx +++ b/dev/ese/src/inc/file.hxx @@ -64,39 +64,11 @@ INLINE VOID FILEFreeDefaultRecord( FUCB *pfucbFake ) VOID FILETableMustRollback( PIB *ppib, FCB *pfcbTable ); -// Guard object to help with FCB lifetime. -// Uses unique_ptr with a custom deleter to decrement FCB refcount when the FCB reference goes out of scope. -// Purges (deletes) semi-intialized FCBs. Required because an FCB is only added to the inst's FCB list -// after being fully initialized. -class FCBRefDeleter -{ -public: - void operator() ( FCB* pfcb ); -}; - -// A reference to an FCB. Works with different FCB lifetime states and refcount to ensure proper access to an FCB. -using FCBRef = std::unique_ptr; - -enum OPENTYPE -{ - openNormal, // normal open cursor (may be either unique or non-unique btree) - openNormalNoTouch, // normal open cursor, doesn't read pgnoFDP - openNew // open cursor on newly-created FDP -}; - -ERR ErrFILEIOpenFCB( - PIB *ppib, - IFMP ifmp, - PGNO pgnoFDP, - OBJID objidFDP, - OPENTYPE opentype, - FCBRef& fcbRef ); - ERR ErrFILEIInitializeFCB( PIB *ppib, IFMP ifmp, TDB *ptdb, - FCB *pfcbNew, + FCB *ppfcbNew, IDB *pidb, BOOL fPrimary, PGNO pgnoFDP, @@ -107,9 +79,7 @@ VOID FILESetAllIndexMask( FCB *pfcbTable ); ERR ErrFILEDeleteTable( PIB *ppib, IFMP ifmp, const CHAR *szTable, const BOOL fAllowTableDeleteSensitive = fFalse, const JET_GRBIT grbit = NO_GRBIT ); FIELD *PfieldFCBFromColumnName( FCB *pfcb, _In_ PCSTR szColumnName ); - -ERR ErrFILEFcbGet( PIB* ppib, IFMP ifmp, PGNO pgnoFDP, OBJID objidFDP, FCBRef& pfcbRef ); -ERR ErrFILEFcbGetNoTouch( PIB* ppib, IFMP ifmp, PGNO pgnoFDP, OBJID objidFDP, FCBRef& pfcbRef ); + FCB *PfcbFCBFromIndexName( FCB *pfcbTable, _In_ PCSTR szName ); struct FDPINFO @@ -308,34 +278,3 @@ INLINE VOID FILEReleaseCurrentSecondary( FUCB *pfucb ) } } -INLINE void FCBRefDeleter::operator()( FCB* pfcb ) -{ - if ( pfcb != pfcbNil ) - { - if ( pfcb->FInitialized() ) - { - pfcb->Release( !pfcb->FTypeTable() /* fPreventMoveToAvail */ ); //only table FCBs may move to avail list - } - else - { - // we own the FCB bcause we are in the create path (we're closing because the FCB was created - // but not fully initialized, or because an error - // occurred during FILEOpenTable()) - -#ifdef DEBUG - pfcb->Lock(); - pfcb->FucbList().LockForEnumeration(); - Assert( pfcb->FucbList().Count() == 0 ); - pfcb->FucbList().UnlockForEnumeration(); - pfcb->Unlock(); -#endif - - // synchronously purge the FCB - Assert( pfcb->WRefCount() == 1 ); // we should be the only reference - pfcb->Release( fTrue /* fPreventMoveToAvail */ ); - pfcb->PrepareForPurge(); - pfcb->Purge(); - } - } -} - diff --git a/dev/ese/src/inc/fucb.hxx b/dev/ese/src/inc/fucb.hxx index a2f25c4f..ecf3523a 100644 --- a/dev/ese/src/inc/fucb.hxx +++ b/dev/ese/src/inc/fucb.hxx @@ -519,7 +519,6 @@ INLINE BOOL FFUCBSpace( const FUCB *pfucb ) INLINE BOOL FFUCBUnique( const FUCB *pfucb ) { Assert( pfcbNil != pfucb->u.pfcb ); - Assert( pfucb->u.pfcb->FInitialized() ); // unique flag is only valid after full FCB initialization const BOOL fUnique = ( pfucb->u.pfcb->FUnique() || FFUCBSpace( pfucb ) ); diff --git a/dev/ese/src/inc/space.hxx b/dev/ese/src/inc/space.hxx index fc7e1794..04136208 100644 --- a/dev/ese/src/inc/space.hxx +++ b/dev/ese/src/inc/space.hxx @@ -146,7 +146,7 @@ VOID SPTerm(); // initialize Root page // returns pfucb placed on Root // -ERR ErrSPInitFCB( _In_ PIB * ppib, _Inout_ FCB * const pfcb ); +ERR ErrSPInitFCB( _Inout_ FUCB * const pfucb ); ERR ErrSPDeferredInitFCB( _Inout_ FUCB * const pfucb ); ERR ErrSPGetLastPgno( _Inout_ PIB * ppib, _In_ const IFMP ifmp, _Out_ PGNO * ppgno ); ERR ErrSPGetLastExtent( _Inout_ PIB * ppib, _In_ const IFMP ifmp, _Out_ EXTENTINFO * pextinfo ); From 8fd004861930ab9276ed2d045aa92896e1f1ba77 Mon Sep 17 00:00:00 2001 From: Umair Ahmad Date: Sat, 5 Nov 2022 21:44:59 +0000 Subject: [PATCH 086/102] Fix some asserts that hit in dbmaintstress. They are two separate but similar issues: 1. Verstore fix is for a case where we have a flag delete RCE after a table delete RCE. This can happen because of verstore cleanup of delta RCEs doing the finalize action on a delta column. The change is just copying the assert at line 5880 for the pfcbLV case. It can also happen on a regular table if it has a delta column with JET_bitColumnDeleteOnZero. 2. The DIR fix is a case where we are opening a proxy cursor during concurrent index create, and the assert isn't happy because the proxy PIB didn't open the database. The proxy PIB is a system PIB used in the execution of a finalize task to do a flag delete operation. We don't call ErrDBOpenDatabase() for any DBTASKs. So it is just relaxing this assert for this specific case. [Substrate:26eee8f68957ae38a85cbeea7794441147f7a26d] --- dev/ese/src/ese/dir.cxx | 5 +++++ dev/ese/src/ese/ver.cxx | 6 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/dev/ese/src/ese/dir.cxx b/dev/ese/src/ese/dir.cxx index a98b12a4..6900b09b 100644 --- a/dev/ese/src/ese/dir.cxx +++ b/dev/ese/src/ese/dir.cxx @@ -304,9 +304,14 @@ ERR ErrDIROpenByProxy( PIB *ppib, FCB *pfcb, FUCB **ppfucb, LEVEL level ) CheckPIB( ppib ); #ifdef DEBUG + // We may be opening a proxy cursor using a system PIB. + // (e.g. if concurrent index create is processing an RCE created by a DBTASK such as FINALIZETASK). + // We don't call ErrDBOpenDatabase() for any DBTASKs, so the CheckDBID() call below needs to be skipped for that case. + // See VSO# 241238: AssertFail: "FPIBUserOpenedDatabase( ppib, rgfmp[ifmp].Dbid() )" from VER::ErrVERModify INST *pinst = PinstFromPpib( ppib ); if ( !pinst->FRecovering() && pinst->m_fSTInit == fSTInitDone + && !ppib->FSystemCallback() // system PIBs may not call ErrDBOpenDatabase() && !Ptls()->FIsTaskThread() && !Ptls()->fIsRCECleanup ) { diff --git a/dev/ese/src/ese/ver.cxx b/dev/ese/src/ese/ver.cxx index 3d53cdee..04013d87 100644 --- a/dev/ese/src/ese/ver.cxx +++ b/dev/ese/src/ese/ver.cxx @@ -5825,10 +5825,10 @@ ERR VER::ErrVERICleanOneRCE( RCE * const prce ) VERIWaitForTasks( this, pfcbT, fFalse, fTrue ); - // bugfix (#45382): May have outstanding moved RCE's + // Processing of delta RCEs may have created flagDelete/writeLock RCEs after operTableDelete. Assert( pfcbT->PrceOldest() == prceNil - || ( pfcbT->PrceOldest()->Oper() == operFlagDelete - && pfcbT->PrceOldest()->FMoved() ) ); + || pfcbT->PrceOldest()->Oper() == operFlagDelete + || pfcbT->PrceOldest()->Oper() == operWriteLock ); VERNullifyAllVersionsOnFCB( pfcbT ); pfcbT->PrepareForPurge( fFalse ); From b419d009be88b7f23dfda1367b93bd2552d9e78b Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Tue, 8 Nov 2022 03:42:43 +0000 Subject: [PATCH 087/102] ESE Block Cache: Stability: fix deadlock between write back and RegisterIFilePerfAPI During local stress, we hit a case like this: - The IO thread has started several async requests, each of which has a read count on the RWL. - The other thread has opened the file (already open) and is registering an IFilePerfAPI but blocks because of pending IO. - The IO thread starts another request which blocks because the other thread is waiting its turn. - Deadlock. This is similar to an earlier problem we fixed where the thread calling RegisterIFilePerfAPI deadlocked because it had previously requested IO. The fix is for any thread interacting with this registration RWL to detect if they will block and, if so, call ErrIOIssue first to prevent the deadlock. [Substrate:6bff8a98a2f947c8a82cf91f748339f5e8862112] --- dev/ese/src/os/blockcache/_filefilter.hxx | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/dev/ese/src/os/blockcache/_filefilter.hxx b/dev/ese/src/os/blockcache/_filefilter.hxx index 0cc11aea..e302b70b 100644 --- a/dev/ese/src/os/blockcache/_filefilter.hxx +++ b/dev/ese/src/os/blockcache/_filefilter.hxx @@ -2064,7 +2064,16 @@ class TFileFilter // ff CLockDeadlockDetectionInfo::DisableOwnershipTracking(); CLockDeadlockDetectionInfo::DisableDeadlockDetection(); - m_rwlRegisterIFilePerfAPI.EnterAsReader(); + + if ( !m_rwlRegisterIFilePerfAPI.FTryEnterAsReader() ) + { + // ensure any previously requested IO is issued to avoid deadlock with registration + + CallS( ErrIOIssue() ); + + m_rwlRegisterIFilePerfAPI.EnterAsReader(); + } + CLockDeadlockDetectionInfo::EnableDeadlockDetection(); CLockDeadlockDetectionInfo::EnableOwnershipTracking(); } @@ -2645,13 +2654,16 @@ ERR TFileFilter::ErrIOIssue() template< class I > void TFileFilter::RegisterIFilePerfAPI( _In_ IFilePerfAPI* const pfpapi ) { - // ensure any previously requested IO is issued to avoid deadlock during registration + // disallow registration of IFilePerfAPI during any IO request - CallS( ErrIOIssue() ); + if ( !m_rwlRegisterIFilePerfAPI.FTryEnterAsWriter() ) + { + // ensure any previously requested IO is issued to avoid deadlock during registration - // disallow registration of IFilePerfAPI during any IO request + CallS( ErrIOIssue() ); - m_rwlRegisterIFilePerfAPI.EnterAsWriter(); + m_rwlRegisterIFilePerfAPI.EnterAsWriter(); + } // if we already registered an IFilePerfAPI then drop this one, otherwise register it From 23cb6f16f7661ef831a014041dd57fce61681f31 Mon Sep 17 00:00:00 2001 From: Brett Shirley Date: Tue, 8 Nov 2022 15:35:13 +0000 Subject: [PATCH 088/102] Move cbFree >= cbShrinkage enforce to PageEnforce so we get a page dump in .IRS.RAW [Substrate:8a6c0fcb2ed21d62a09b4cfcbbb84d7ed2c51a47] --- dev/ese/src/ese/cpage.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/ese/src/ese/cpage.cxx b/dev/ese/src/ese/cpage.cxx index a18a036e..fc6d5115 100644 --- a/dev/ese/src/ese/cpage.cxx +++ b/dev/ese/src/ese/cpage.cxx @@ -5755,7 +5755,7 @@ VOID CPAGE::DehydratePageUnadjusted_( _In_ const ULONG cbNewSize ) Enforce( cbShrinkage < CbPage() ); Enforce( cbShrinkage < 0x10000 ); - Enforce( ppghdr->cbFree >= cbShrinkage ); + PageEnforce( (*this), ppghdr->cbFree >= cbShrinkage ); Enforce( ppghdr->ibMicFree < ( m_platchManager->CbBuffer( m_bfl ) - cbShrinkage ) ); Assert( ppghdr->ibMicFree < m_platchManager->CbBuffer( m_bfl ) - CbTagArray_() ); From a9942552d6b69ebcf8fc0b9edf9093410de78fa4 Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Tue, 8 Nov 2022 16:50:59 +0000 Subject: [PATCH 089/102] ESE: fix ancient problem where we don't EnterDML during schema upgraders Recently we hit an assert where the FCB hash table was expanding and failed an assert that an entry in the table matched its key. This happened during a store.worker schema upgrader. This revealed that we have an ancient issue where if someone is modifying a template table via JET_bitPermitDDL then we should be protecting accesses to the table schema with EnterDML. This is currently skipped for tables with fixed schema like template tables. The fix is to EnterDML for template tables if their schema is not truly fixed. We detect this via the existing FTemplateStatic(). Note that this results in EnterDML being required for updates to template tables which is supported but very rarely used. This change also significantly improves the UnpublishedJetConvertTests. [Substrate:a7ac027d4d3439608c08509b7ddfab8faa817d92] --- dev/ese/src/ese/cat.cxx | 3 -- dev/ese/src/ese/fcb.cxx | 4 +-- dev/ese/src/ese/fcreate.cxx | 6 ---- dev/ese/src/inc/fcb.hxx | 57 +++++++++++++++++++++++++++++++------ dev/ese/src/inc/tdb.hxx | 11 ++++--- 5 files changed, 55 insertions(+), 26 deletions(-) diff --git a/dev/ese/src/ese/cat.cxx b/dev/ese/src/ese/cat.cxx index 91748af0..ca2c56c5 100644 --- a/dev/ese/src/ese/cat.cxx +++ b/dev/ese/src/ese/cat.cxx @@ -8381,10 +8381,7 @@ ERR ErrCATInitFCB( FUCB *pfucbTable, OBJID objidTable, const BOOL fSkipPgnoFDPLa pfcbTemplateTable->GetAPISpaceHints( &jsphTemplate ); // flag the template as static, i.e., it can't be opened for modification anymore - // need the lock because it's a bit-field, so read/write must be atomic - pfcbTemplateTable->Lock(); pfcbTemplateTable->SetTemplateStatic(); - pfcbTemplateTable->Unlock(); // Increment ref count so that template table does not get purged pfcbTemplateTable->IncrementRefCount(); diff --git a/dev/ese/src/ese/fcb.cxx b/dev/ese/src/ese/fcb.cxx index cd866ad5..66f3812b 100644 --- a/dev/ese/src/ese/fcb.cxx +++ b/dev/ese/src/ese/fcb.cxx @@ -2881,10 +2881,8 @@ ERR FCB::ErrSetUpdatingAndEnterDML( PIB *ppib, BOOL fWaitOnConflict ) Assert( IsUnlocked_( LOCK_TYPE::ltShared ) ); // If DDL is fixed, then there's no contention with CreateIndex - if ( !FFixedDDL() ) + if ( FNeedDML_() ) { - Assert( FTypeTable() ); // Sorts and temp tables have fixed DDL. - Assert( !FTemplateTable() ); Assert( Ptdb() != ptdbNil ); CheckIndexing: diff --git a/dev/ese/src/ese/fcreate.cxx b/dev/ese/src/ese/fcreate.cxx index fada7fb9..851753b2 100644 --- a/dev/ese/src/ese/fcreate.cxx +++ b/dev/ese/src/ese/fcreate.cxx @@ -2736,13 +2736,7 @@ ERR ErrFILECreateTable( PIB *ppib, IFMP ifmp, JET_TABLECREATE5_A *ptablecreate, if ( pfcbNil != pfcbTemplateTable ) { // flag the template as static, i.e., it can't be opened for modification anymore - // need the lock because it's a bit-field, so read/write must be atomic - // NOTE: Once upon a time, the bit fields needed this lock. Not anymore; the bits - // are set atomically. Leaving the lock in place now just out of worry they have - // some other unintended sync impact. - pfcbTemplateTable->Lock(); pfcbTemplateTable->SetTemplateStatic(); - pfcbTemplateTable->Unlock(); // Increment ref count so that template table does not get purged pfcbTemplateTable->IncrementRefCount(); diff --git a/dev/ese/src/inc/fcb.hxx b/dev/ese/src/inc/fcb.hxx index 8112f276..0515d1f0 100644 --- a/dev/ese/src/inc/fcb.hxx +++ b/dev/ese/src/inc/fcb.hxx @@ -1125,6 +1125,7 @@ private: BOOL FWRefCountOK_(); #endif BOOL FNeedLock_() const; + BOOL FNeedDML_(); VOID EnterDML_(); VOID LeaveDML_(); VOID AssertDML_() const; @@ -1665,8 +1666,24 @@ INLINE BOOL FCB::FTemplateStatic() const { return !!(m_ulFCBFlags & mskFC INLINE VOID FCB::SetTemplateStatic() { Assert( FTemplateTable() ); - Assert( IsLocked() ); - AtomicExchangeSet( &m_ulFCBFlags, mskFCBTemplateStatic ); + + // NOTE: Perform this change while in the DDL lock. this ensures that we don't + // change the state while another thread is between EnterDML and LeaveDML because + // it could change the result of FNeedDML_ which could cause them to fail to release + // the lock + + // NOTE: Once upon a time, the bit fields needed this lock. Not anymore; the bits + // are set atomically. Leaving the lock in place now just out of worry they have + // some other unintended sync impact. + + if ( !FTemplateStatic() ) + { + EnterDDL(); + Lock(); + AtomicExchangeSet( &m_ulFCBFlags, mskFCBTemplateStatic ); + Unlock(); + LeaveDDL(); + } } // There is no FCB::ResetTemplateStatic(), since // "Flagging the template as static is currently a one-way trip." @@ -1865,15 +1882,41 @@ INLINE BOOL FCB::FNeedLock_() const } // Enters FCB's critical section for data set/retrieve only if needed. + +INLINE BOOL FCB::FNeedDML_() +{ + // we must always use the DML lock for normal tables + + if ( !FFixedDDL() ) + { + return fTrue; + } + + // these are the only types of tables that should have fixed DDL + + Assert( FTypeTable() || FTypeTemporaryTable() || FTypeSort() ); + + // if this is a template table and the template is not yet confirmed to be + // static schema then we still need to use the DML lock. this covers the + // cases where template table schema is upgraded before the derived tables + // are opened for use + + if ( FTypeTable() && FTemplateTable() && !FTemplateStatic() ) + { + return fTrue; + } + + return fFalse; +} + INLINE VOID FCB::EnterDML() { Assert( FTypeTable() || FTypeTemporaryTable() || FTypeSort() ); Assert( Ptdb() != ptdbNil ); - if ( !FFixedDDL() ) + if ( FNeedDML_() ) { Assert( FTypeTable() ); // Sorts and temp tables have fixed DDL. - Assert( !FTemplateTable() ); EnterDML_(); } @@ -1886,10 +1929,9 @@ INLINE VOID FCB::LeaveDML() Assert( Ptdb() != ptdbNil ); AssertDML(); - if ( !FFixedDDL() ) + if ( FNeedDML_() ) { Assert( FTypeTable() ); // Sorts and temp tables have fixed DDL. - Assert( !FTemplateTable() ); LeaveDML_(); } } @@ -1901,10 +1943,9 @@ INLINE VOID FCB::AssertDML() Assert( FTypeTable() || FTypeTemporaryTable() || FTypeSort() ); Assert( Ptdb() != ptdbNil ); - if ( !FFixedDDL() ) + if ( FNeedDML_() ) { Assert( FTypeTable() ); // Sorts and temp tables have fixed DDL. - Assert( !FTemplateTable() ); Assert( IsUnlocked() ); AssertDML_(); } diff --git a/dev/ese/src/inc/tdb.hxx b/dev/ese/src/inc/tdb.hxx index fba05ebd..386203fa 100644 --- a/dev/ese/src/inc/tdb.hxx +++ b/dev/ese/src/inc/tdb.hxx @@ -1568,8 +1568,7 @@ INLINE VOID TDB::SetFLid64( BOOL fLid64 ) INLINE VOID FCB::ResetUpdating_() { Assert( FTypeTable() ); // Sorts and temp tables have fixed DDL. - Assert( !FTemplateTable() ); - Assert( !FFixedDDL() ); + Assert( FNeedDML_() ); Assert( Ptdb() != ptdbNil ); Ptdb()->LeaveUpdating(); @@ -1577,7 +1576,7 @@ INLINE VOID FCB::ResetUpdating_() INLINE VOID FCB::ResetUpdatingAndLeaveDML() { // If DDL is fixed, then there's no contention with CreateIndex - if ( !FFixedDDL() ) + if ( FNeedDML_() ) { LeaveDML_(); ResetUpdating_(); @@ -1586,7 +1585,7 @@ INLINE VOID FCB::ResetUpdatingAndLeaveDML() INLINE VOID FCB::ResetUpdating() { // If DDL is fixed, then there's no contention with CreateIndex - if ( !FFixedDDL() ) + if ( FNeedDML_() ) { ResetUpdating_(); } @@ -1599,7 +1598,7 @@ INLINE VOID FCB::SetIndexing() Assert( Ptdb() != ptdbNil ); // Can only override FixedDDL flag if we have exclusive use of the table. - Assert( !FFixedDDL() || CrefDomainDenyRead() > 0 ); + Assert( FNeedDML_() || CrefDomainDenyRead() > 0 ); Ptdb()->EnterIndexing(); } @@ -1611,7 +1610,7 @@ INLINE VOID FCB::ResetIndexing() Assert( Ptdb() != ptdbNil ); // Can only override FixedDDL flag if we have exclusive use of the table. - Assert( !FFixedDDL() || CrefDomainDenyRead() > 0 ); + Assert( FNeedDML_() || CrefDomainDenyRead() > 0 ); Ptdb()->LeaveIndexing(); } From b9cbce22a559279744cae3be1586b740dd460313 Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Tue, 8 Nov 2022 21:17:36 +0000 Subject: [PATCH 090/102] ESE Block Cache: Perf: optimize use of Journal Slabs In prod, we are spending a lot of time manipulating journal slabs during the evict process. We are using them to allocate clusters to replace clusters that hold evicted data to support our recovery scheme. This process is effectively a FIFO allocation scheme and so it should be cheap. We actually end up using a lot of CPU doing this because we scan the entire slab twice to find the next cluster. In prod, we end up calling ICachedBlockSlab::ErrGetSlotForWrite 16x per database page. This change optimizes this case as follows: - each slab now tracks the count of invalid slots and the lowest invalid slot number - each journal slab now uses the cached lowest invalid slot to directly find the next cluster - ErrEvictOrInvalidateSlot uses the count of invalid slots to determine if it needs to flush to reuse the clusters in a journal slab - ErrEvictOrInvalidateSlot no longer calls ICachedBlockSlab::ErrGetSlotForWrite twice for every operation Other changes: - We now use a CJournalSlabWrapper to manage the ref count on the journal slabs, cleaning up some code. - The alloc of CCachedBlockSlab and CCachedBlockSlabReference and CJournalSlabWrapper are pooled - CHashedLRUKCacheThreadLocalStorage::FOpenSlab and FAnyOpenSlab were changed to use the ibSlab as the key to allow wrappers of ICachedBlockSlab like CJournalSlabWrapper to be used [Substrate:678929bcce24d55713a1ebb161e23f41c2aabc8a] --- dev/ese/published/inc/os/osblockcache.hxx | 4 + .../interop/CCachedBlockSlabWrapper.h | 8 + .../blockcache/interop/CachedBlockSlabBase.h | 8 + .../interop/CachedBlockSlabRemotable.h | 5 + .../blockcache/interop/ICachedBlockSlab.h | 6 + dev/ese/src/os/blockcache/_filefilter.hxx | 5 - .../src/os/blockcache/_hashedlrukcache.hxx | 138 +++++----- .../os/blockcache/_hashedlrukcacheslab.hxx | 254 +++++++++++++++--- .../_hashedlrukcacheslabmanager.hxx | 18 ++ .../_hashedlrukcacheslabwrapper.hxx | 5 + .../_hashedlrukcachethreadlocalstorage.hxx | 8 +- 11 files changed, 352 insertions(+), 107 deletions(-) diff --git a/dev/ese/published/inc/os/osblockcache.hxx b/dev/ese/published/inc/os/osblockcache.hxx index 7765ece9..6be2e853 100644 --- a/dev/ese/published/inc/os/osblockcache.hxx +++ b/dev/ese/published/inc/os/osblockcache.hxx @@ -1247,6 +1247,10 @@ class ICachedBlockSlab // cbs virtual BOOL FDirty() = 0; + // Indicates the number of invalid slots in the slab. + + virtual int CInvalidSlot() = 0; + // Callback used to indicate that a slab is saved. typedef void (*PfnSlabSaved)( _In_ const ERR err, diff --git a/dev/ese/src/noncore/blockcache/interop/CCachedBlockSlabWrapper.h b/dev/ese/src/noncore/blockcache/interop/CCachedBlockSlabWrapper.h index e78e3e83..2354495e 100644 --- a/dev/ese/src/noncore/blockcache/interop/CCachedBlockSlabWrapper.h +++ b/dev/ese/src/noncore/blockcache/interop/CCachedBlockSlabWrapper.h @@ -77,6 +77,8 @@ namespace Internal BOOL FDirty() override; + int CInvalidSlot() override; + ERR ErrSave( _In_opt_ const ::ICachedBlockSlab::PfnSlabSaved pfnSlabSaved, _In_opt_ const DWORD_PTR keySlabSaved ) override; }; @@ -420,6 +422,12 @@ namespace Internal return I()->IsDirty() ? fTrue : fFalse; } + template + inline int CCachedBlockSlabWrapper::CInvalidSlot() + { + return I()->InvalidSlotCount(); + } + template inline ERR CCachedBlockSlabWrapper::ErrSave( _In_opt_ const ::ICachedBlockSlab::PfnSlabSaved pfnSlabSaved, _In_opt_ const DWORD_PTR keySlabSaved ) diff --git a/dev/ese/src/noncore/blockcache/interop/CachedBlockSlabBase.h b/dev/ese/src/noncore/blockcache/interop/CachedBlockSlabBase.h index ac8c4012..81df9e73 100644 --- a/dev/ese/src/noncore/blockcache/interop/CachedBlockSlabBase.h +++ b/dev/ese/src/noncore/blockcache/interop/CachedBlockSlabBase.h @@ -66,6 +66,8 @@ namespace Internal virtual bool IsDirty(); + virtual int InvalidSlotCount(); + virtual void Save( ICachedBlockSlab::SlabSaved^ slabSaved ); }; @@ -429,6 +431,12 @@ namespace Internal return Pi->FDirty() ? true : false; } + template + inline int CachedBlockSlabBase::InvalidSlotCount() + { + return Pi->CInvalidSlot(); + } + template inline void CachedBlockSlabBase::Save( ICachedBlockSlab::SlabSaved^ slabSaved ) { diff --git a/dev/ese/src/noncore/blockcache/interop/CachedBlockSlabRemotable.h b/dev/ese/src/noncore/blockcache/interop/CachedBlockSlabRemotable.h index 2fac6a6b..9a2c42ba 100644 --- a/dev/ese/src/noncore/blockcache/interop/CachedBlockSlabRemotable.h +++ b/dev/ese/src/noncore/blockcache/interop/CachedBlockSlabRemotable.h @@ -114,6 +114,11 @@ namespace Internal return this->target->IsDirty(); } + virtual int InvalidSlotCount() + { + return this->target->InvalidSlotCount(); + } + virtual void Save( ICachedBlockSlab::SlabSaved^ slabSaved ) { return this->target->Save( slabSaved ); diff --git a/dev/ese/src/noncore/blockcache/interop/ICachedBlockSlab.h b/dev/ese/src/noncore/blockcache/interop/ICachedBlockSlab.h index 67d6b9ea..de69dc12 100644 --- a/dev/ese/src/noncore/blockcache/interop/ICachedBlockSlab.h +++ b/dev/ese/src/noncore/blockcache/interop/ICachedBlockSlab.h @@ -252,6 +252,12 @@ namespace Internal /// True if the slab contains changes that need to be written back to storage via ErrSave. bool IsDirty(); + /// + /// Indicates the number of invalid slots in the slab. + /// + /// The number of invalid slots in the slab. + int InvalidSlotCount(); + /// /// Delegate used to indicate that a slab is saved. /// diff --git a/dev/ese/src/os/blockcache/_filefilter.hxx b/dev/ese/src/os/blockcache/_filefilter.hxx index e302b70b..06625e33 100644 --- a/dev/ese/src/os/blockcache/_filefilter.hxx +++ b/dev/ese/src/os/blockcache/_filefilter.hxx @@ -821,11 +821,6 @@ class TFileFilter // ff } } - static void Cleanup() - { - CRequest::Cleanup(); - } - protected: ~CThreadLocalStorage() diff --git a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx index 28db010d..3f967674 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx @@ -4688,6 +4688,51 @@ class THashedLRUKCache CReaderWriterLock m_rwlPresenceFilter; }; + // Journal Slab Wrapper + + class CJournalSlabWrapper : public CCachedBlockSlabWrapper + { + public: // specialized API + + CJournalSlabWrapper( _In_ THashedLRUKCache* const pc, + _Inout_ ICachedBlockSlab** const ppcbs, + _Inout_ QWORD* const pibJournalSlabAcquired ) + : CCachedBlockSlabWrapper( ppcbs ), + m_pc( pc ), + m_ibJournalSlabAcquired( *pibJournalSlabAcquired ) + { + *pibJournalSlabAcquired = 0; + } + +#pragma push_macro( "new" ) +#undef new + + using CPool = TPool; + + void* operator new( _In_ const size_t cb ) + { + return CPool::PvAllocate(); + } + + void operator delete( _In_opt_ void* const pv ) + { + void* pvT = pv; + CPool::Free( &pvT ); + } + +#pragma pop_macro( "new" ) + + virtual ~CJournalSlabWrapper() + { + m_pc->ReleaseJournalSlab( m_ibJournalSlabAcquired ); + } + + private: + + THashedLRUKCache* const m_pc; + const QWORD m_ibJournalSlabAcquired; + }; + private: ERR ErrDumpJournalMetadata( _In_ CPRINTF* const pcprintf ); @@ -5083,10 +5128,7 @@ class THashedLRUKCache void UnregisterOpenSlabWait( _In_ CHashedLRUKCacheThreadLocalStorage* const pctls, _In_ const QWORD ibSlab ); ERR ErrGetJournalSlab( _Out_ ICachedBlockSlab** const ppcbs ); - - size_t IcrefJournalSlab( _In_ const QWORD ibSlab ); - void ReferenceJournalSlab( _In_ const QWORD ibSlab ); - void ReleaseJournalSlab( _In_ const QWORD ibSlab ); + void ReleaseJournalSlab( _In_ const QWORD ibJournalSlabAcquired ); ERR ErrEvictSlot( _In_ ICachedBlockSlab* pcbs, _In_ const CCachedBlockSlot& slot, @@ -9630,7 +9672,6 @@ ERR THashedLRUKCache::ErrGetSlabInternal( _In_ const QWORD const BOOL fWait = !fForSlabWriteBack; CHashedLRUKCacheThreadLocalStorage* pctls = NULL; ICachedBlockSlabManager* pcbsm = NULL; - BOOL fRelease = fFalse; BOOL fUnregisterWait = fFalse; ERR errSlab = JET_errSuccess; ICachedBlockSlab* pcbs = NULL; @@ -9654,17 +9695,9 @@ ERR THashedLRUKCache::ErrGetSlabInternal( _In_ const QWORD Error( ErrBlockCacheInternalError( "HashedLRUKCacheUnknownSlabType" ) ); } - // if this is a journal slab then note we are acquiring it - - if ( pcbsm == m_pcbsmJournal ) - { - ReferenceJournalSlab( ibSlab ); - fRelease = fTrue; - } - // register as a waiter for the slab - if ( fWait && !fForSlabWriteBack ) + if ( fWait ) { RegisterOpenSlabWait( pctls, ibSlab ); fUnregisterWait = fTrue; @@ -9679,8 +9712,6 @@ ERR THashedLRUKCache::ErrGetSlabInternal( _In_ const QWORD Call( ErrUnexpectedMetadataReadFailure( "GetSlabById", err, ErrERRCheck( JET_errDiskIO ) ) ); } - fRelease = fRelease && !pcbs; - // update our cached block presence filter if necessary if ( pcbsm == m_pcbsmHash && pcbs ) @@ -9720,10 +9751,6 @@ HandleError: { UnregisterOpenSlabWait( pctls, ibSlab ); } - if ( fRelease ) - { - ReleaseJournalSlab( ibSlab ); - } ReleaseSlab( err, &pcbs ); if ( ( fIgnoreVerificationErrors ? ErrIgnoreVerificationErrors( err ) : err ) < JET_errSuccess ) { @@ -9751,9 +9778,7 @@ ERR THashedLRUKCache::ErrUnexpectedMetadataReadFailure( _In_ const char* con template void THashedLRUKCache::ReleaseSlab( _In_ const ERR err, _Inout_ ICachedBlockSlab** const ppcbs ) { - ICachedBlockSlab* const pcbs = *ppcbs; - QWORD ibSlab = 0; - BOOL fJournalSlab = fFalse; + ICachedBlockSlab* const pcbs = *ppcbs; *ppcbs = NULL; @@ -9761,11 +9786,6 @@ void THashedLRUKCache::ReleaseSlab( _In_ const ERR err, _Inout_ ICachedBlockS if ( pcbs ) { - // determine if this is a journal slab - - CallS( pcbs->ErrGetPhysicalId( &ibSlab ) ); - fJournalSlab = FJournalSlab( ibSlab ); - // unregister our open slab UnregisterOpenSlab( pcbs ); @@ -9777,13 +9797,6 @@ void THashedLRUKCache::ReleaseSlab( _In_ const ERR err, _Inout_ ICachedBlockS // release the slab delete pcbs; - - // if this was a journal slab then note that it has been released - - if ( fJournalSlab ) - { - ReleaseJournalSlab( ibSlab ); - } } } @@ -10024,14 +10037,16 @@ void THashedLRUKCache::UnregisterOpenSlabWait( _In_ CHashedLRUKCacheThreadLoc template ERR THashedLRUKCache::ErrGetJournalSlab( _Out_ ICachedBlockSlab** const ppcbs ) { - ERR err = JET_errSuccess; - ICachedBlockSlab* pcbs = NULL; + ERR err = JET_errSuccess; + QWORD ibJournalSlabAcquired = 0; + ICachedBlockSlab* pcbs = NULL; + ICachedBlockSlab* pcbsWrapper = NULL; *ppcbs = NULL; // choose a journal slab randomly to provide scalability and to ensure all clusters can be used // - // NOTE: this will acquire an extra ref count on the journal slab that we will release at the end + // NOTE: this will acquire an extra ref count on the journal slab that we must release with the slab UINT uiRandom; const BOOL fSuccess = rand_s( &uiRandom ) == 0; @@ -10039,61 +10054,58 @@ ERR THashedLRUKCache::ErrGetJournalSlab( _Out_ ICachedBlockSlab** const ppcbs const size_t icrefJournalSlabStart = uiRandom % m_ccrefJournalSlab; const QWORD cbSlab = CbChunkPerSlab(); - QWORD ibSlab = 0; for ( size_t dicrefJournalSlab = 0; dicrefJournalSlab < m_ccrefJournalSlab; dicrefJournalSlab++ ) { const size_t icrefJournalSlab = ( icrefJournalSlabStart + dicrefJournalSlab ) % m_ccrefJournalSlab; if ( AtomicCompareExchange( (LONG*)&m_rgcrefJournalSlab[ icrefJournalSlab ], 0, 1 ) == 0 ) { - ibSlab = m_pch->IbChunkJournal() + icrefJournalSlab * cbSlab; + ibJournalSlabAcquired = m_pch->IbChunkJournal() + icrefJournalSlab * cbSlab; break; } } - if ( ibSlab == 0 ) + if ( ibJournalSlabAcquired == 0 ) { - ibSlab = m_pch->IbChunkJournal() + icrefJournalSlabStart * cbSlab; - ReferenceJournalSlab( ibSlab ); + ibJournalSlabAcquired = m_pch->IbChunkJournal() + icrefJournalSlabStart * cbSlab; + AtomicIncrement( (LONG*)&m_rgcrefJournalSlab[ icrefJournalSlabStart ] ); } // get the chosen journal slab - Call( ErrGetSlab( ibSlab, &pcbs ) ); + Call( ErrGetSlab( ibJournalSlabAcquired, &pcbs ) ); + + // add a wrapper to the journal slab that will allow us to release the journal slab ref count + + Alloc( pcbsWrapper = new CJournalSlabWrapper( this, &pcbs, &ibJournalSlabAcquired ) ); + pcbs = pcbsWrapper; + pcbsWrapper = NULL; // return the journal slab *ppcbs = pcbs; pcbs = NULL; - // release the extra ref count on the journal slab - HandleError: - ReleaseJournalSlab( ibSlab ); + ReleaseSlab( err, &pcbsWrapper ); ReleaseSlab( err, &pcbs ); if ( err < JET_errSuccess ) { ReleaseSlab( err, ppcbs ); } + if ( ibJournalSlabAcquired ) + { + ReleaseJournalSlab( ibJournalSlabAcquired ); + } return err; } template -size_t THashedLRUKCache::IcrefJournalSlab( _In_ const QWORD ibSlab ) -{ - return (size_t)( ( ibSlab - m_pch->IbChunkJournal() ) / CbChunkPerSlab() ); -} - -template -void THashedLRUKCache::ReferenceJournalSlab( _In_ const QWORD ibSlab ) +void THashedLRUKCache::ReleaseJournalSlab( _In_ const QWORD ibJournalSlabAcquired ) { - AtomicIncrement( (LONG*)&m_rgcrefJournalSlab[ IcrefJournalSlab( ibSlab ) ] ); -} + const size_t icrefJournalSlab = (size_t)( ( ibJournalSlabAcquired - m_pch->IbChunkJournal() ) / CbChunkPerSlab() ); -template -void THashedLRUKCache::ReleaseJournalSlab( _In_ const QWORD ibSlab ) -{ - AtomicDecrement( (LONG*)&m_rgcrefJournalSlab[ IcrefJournalSlab( ibSlab ) ] ); + AtomicDecrement( (LONG*)&m_rgcrefJournalSlab[ icrefJournalSlab ] ); } template @@ -10126,9 +10138,7 @@ ERR THashedLRUKCache::ErrEvictOrInvalidateSlot( _In_ ICachedBlockSlab* // until the journal is flushed. if there are no slots available then we will flush the journal // and evict all FValid and !FDirty slots and try again - Call( pcbsJournal->ErrGetSlotForWrite( s_cbidInvalid, 0, NULL, &slotJournal ) ); - - if ( !slotJournal.FValid() ) + if ( !pcbsJournal->CInvalidSlot() ) { // flush the journal twice to advance the durable for writeback pointer so that we know it is // safe to reuse the clusters @@ -10142,7 +10152,7 @@ ERR THashedLRUKCache::ErrEvictOrInvalidateSlot( _In_ ICachedBlockSlab* } Call( pcbsJournal->ErrGetSlotForWrite( s_cbidInvalid, 0, NULL, &slotJournal ) ); - Assert( slotJournal.FValid() ); + EnforceSz( slotJournal.FValid(), "HashedLRUKCacheEvictOrInvalidateSlot" ); Assert( slotJournal.FDirty() ); // swap the clusters backing these slots diff --git a/dev/ese/src/os/blockcache/_hashedlrukcacheslab.hxx b/dev/ese/src/os/blockcache/_hashedlrukcacheslab.hxx index 2211a74b..e67fc79b 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcacheslab.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcacheslab.hxx @@ -141,6 +141,8 @@ class TCachedBlockSlab // cbs BOOL FDirty() override; + int CInvalidSlot() override; + ERR ErrSave( _In_opt_ const ICachedBlockSlab::PfnSlabSaved pfnSlabSaved, _In_opt_ const DWORD_PTR keySlabSaved ) override; @@ -271,6 +273,11 @@ class TCachedBlockSlab // cbs void RestoreUnacceptedUpdates(); void InvalidateCachedRead(); + void TrackInvalidSlots( _In_ const CCachedBlockSlot& slotBefore, + _In_ const CCachedBlockSlot& slotAfter ); + void TrackValidSlot( _In_ const CCachedBlockSlot& slot ); + void TrackInvalidSlot( _In_ const CCachedBlockSlot& slot ); + ERR ErrResetChunk( _In_ const size_t icbc ); private: @@ -716,6 +723,9 @@ class TCachedBlockSlab // cbs int m_cUpdate; BOOL* m_rgfDirty; BOOL m_fDirty; + int m_cInvalid; + ChunkNumber m_chnoInvalidHint; + SlotNumber m_slnoInvalidHint; CInvasiveList m_ilUpdate; BOOL* m_rgfSlotSuperceded; @@ -1023,6 +1033,10 @@ INLINE ERR TCachedBlockSlab::ErrUpdateSlot( _In_ const CCachedBlockSlot& slot m_rgcUpdate[ icbc ]++; m_cUpdate++; + // track invalid slots + + TrackInvalidSlots( slotstCurrent, slotNew ); + // update the superceded state m_rgfSlotSuperceded[ (size_t)Islot( icbc, icbl ) ] = fSupercededNew; @@ -1517,6 +1531,15 @@ INLINE BOOL TCachedBlockSlab::FDirty() return m_fDirty; } +template< class I > +INLINE int TCachedBlockSlab::CInvalidSlot() +{ + Assert( m_cInvalid >= 0 ); + Assert( m_cInvalid <= m_ccbc* CCachedBlockChunk::Ccbl() ); + + return m_cInvalid; +} + template< class I > INLINE ERR TCachedBlockSlab::ErrSave( _In_opt_ const ICachedBlockSlab::PfnSlabSaved pfnSlabSaved, _In_opt_ const DWORD_PTR keySlabSaved ) @@ -1620,6 +1643,9 @@ INLINE TCachedBlockSlab::TCachedBlockSlab( _In_ IFileFilter* const m_cUpdate( 0 ), m_rgfDirty( NULL ), m_fDirty( fFalse ), + m_cInvalid( 0 ), + m_chnoInvalidHint( chnoInvalid ), + m_slnoInvalidHint( slnoInvalid ), m_rgfSlotSuperceded( NULL ), m_pfnSlabSaved( NULL ), m_keySlabSaved( NULL ), @@ -1675,35 +1701,39 @@ INLINE ERR TCachedBlockSlab::ErrInit() // if this chunk passed verification then verify each slot in this cached block chunk - if ( m_rgerrChunk[ icbc ] >= JET_errSuccess ) + for ( size_t icbl = 0; icbl < ccbl; icbl++ ) { - for ( size_t icbl = 0; icbl < ccbl; icbl++ ) + CCachedBlock* const pcbl = pcbc->Pcbl( icbl ); + + // if this cached block chunk was uninit then defer init this slot + + if ( fUninit ) { - CCachedBlock* const pcbl = pcbc->Pcbl( icbl ); + new( pcbl ) CCachedBlock( CCachedBlockId( volumeidInvalid, + fileidInvalid, + fileserialInvalid, + cbnoInvalid ), + m_clnoMin + (DWORD)( ( m_icbwcBase + icbc ) * ccbl + icbl ), + 0, + tonoInvalid, + tonoInvalid, + fFalse, + fFalse, + fFalse, + fFalse, + fFalse, + updnoInvalid ); + } - // if this cached block chunk was uninit then defer init this slot + // if the chunk is valid then verify the slot - if ( fUninit ) - { - new( pcbl ) CCachedBlock( CCachedBlockId( volumeidInvalid, - fileidInvalid, - fileserialInvalid, - cbnoInvalid ), - m_clnoMin + (DWORD)( ( m_icbwcBase + icbc ) * ccbl + icbl ), - 0, - tonoInvalid, - tonoInvalid, - fFalse, - fFalse, - fFalse, - fFalse, - fFalse, - updnoInvalid ); - } + if ( m_rgerrChunk[ icbc ] >= JET_errSuccess ) + { + CCachedBlockSlot slot( m_ibSlab, Chno( icbc ), Slno( icbl ), *pcbl ); // verify this slot - Call( ErrVerifySlot( CCachedBlockSlot( m_ibSlab, Chno( icbc ), Slno( icbl ), *pcbl ) ) ); + Call( ErrVerifySlot( slot ) ); // determine the max touch number @@ -1711,6 +1741,23 @@ INLINE ERR TCachedBlockSlab::ErrInit() { m_tonoLast = pcbl->Tono0(); } + + // track invalid slots + + if ( !slot.FValid() ) + { + TrackInvalidSlot( slot ); + } + } + + // if the chunk isn't valid then init our stats + + else + { + CCachedBlockSlot slotDefault( m_ibSlab, Chno( icbc ), Slno( icbl ), CCachedBlock() ); + + Assert( !slotDefault.FValid() ); + TrackInvalidSlot( slotDefault ); } } @@ -1962,25 +2009,64 @@ INLINE ERR TCachedBlockSlab::ErrGetSlotForNewImage( _In_ // try to find an empty slot to hold the new image, prefering least recently used slots to ensure we don't leave // slots with very old updnos that could cause problems due to wrap around + // + // if this is a journal slab then use the first invalid slot we find starting from the hint - for ( size_t icbc = 0; icbc < m_ccbc; icbc++ ) + if ( fJournalSlab ) { - CCachedBlockChunk* const pcbc = Pcbc( icbc ); - const size_t ccbl = CCachedBlockChunk::Ccbl(); + const size_t icbcInvalidHint = m_chnoInvalidHint == chnoInvalid ? 0 : (size_t)m_chnoInvalidHint; + const size_t icblInvalidHint = m_slnoInvalidHint == slnoInvalid ? 0 : (size_t)m_slnoInvalidHint; - for ( size_t icbl = 0; icbl < ccbl; icbl++ ) + for ( size_t iicbc = 0; !fFoundEmpty && iicbc < m_ccbc; iicbc++ ) { - CCachedBlock* const pcbl = pcbc->Pcbl( icbl ); + size_t icbc = icbcInvalidHint + iicbc; + icbc = icbc < m_ccbc ? icbc : icbc - m_ccbc; + CCachedBlockChunk* const pcbc = Pcbc( icbc ); + const size_t ccbl = CCachedBlockChunk::Ccbl(); - if ( !pcbl->FValid() && - ( !fFoundEmpty || - ( tono0Empty != tonoInvalid && pcbl->Tono0() == tonoInvalid ) || - ( tono0Empty != tonoInvalid && tono0Empty > pcbl->Tono0() ) ) ) + for ( size_t iicbl = 0; !fFoundEmpty && iicbl < ccbl; iicbl++ ) { - fFoundEmpty = fTrue; - icbcEmpty = icbc; - icblEmpty = icbl; - tono0Empty = pcbl->Tono0(); + size_t icbl = icblInvalidHint + iicbl; + icbl = icbl < ccbl ? icbl : icbl - ccbl; + CCachedBlock* const pcbl = pcbc->Pcbl( icbl ); + + if ( !pcbl->FValid() ) + { + fFoundEmpty = fTrue; + icbcEmpty = icbc; + icblEmpty = icbl; + } + } + } + + if ( !fFoundEmpty ) + { + Assert( m_cInvalid == 0 ); + m_chnoInvalidHint = chnoInvalid; + m_slnoInvalidHint = slnoInvalid; + } + } + else + { + for ( size_t icbc = 0; icbc < m_ccbc; icbc++ ) + { + CCachedBlockChunk* const pcbc = Pcbc( icbc ); + const size_t ccbl = CCachedBlockChunk::Ccbl(); + + for ( size_t icbl = 0; icbl < ccbl; icbl++ ) + { + CCachedBlock* const pcbl = pcbc->Pcbl( icbl ); + + if ( !pcbl->FValid() && + ( !fFoundEmpty || + ( tono0Empty != tonoInvalid && pcbl->Tono0() == tonoInvalid ) || + ( tono0Empty != tonoInvalid && tono0Empty > pcbl->Tono0() ) ) ) + { + fFoundEmpty = fTrue; + icbcEmpty = icbc; + icblEmpty = icbl; + tono0Empty = pcbl->Tono0(); + } } } } @@ -2751,6 +2837,8 @@ INLINE void TCachedBlockSlab::RevertUnacceptedUpdate( _In_ CUpdate* const pup m_rgcUpdate[ icbc ]--; m_cUpdate--; + TrackInvalidSlots( pupdate->SlotAfter(), pupdate->SlotBefore() ); + if ( fPermanently ) { m_ilUpdate.Remove( pupdate ); @@ -2781,6 +2869,8 @@ INLINE void TCachedBlockSlab::RestoreUnacceptedUpdates() m_rgcUpdate[ icbc ]++; m_cUpdate++; + + TrackInvalidSlots( pupdate->SlotBefore(), pupdate->SlotAfter() ); } InvalidateCachedRead(); @@ -2792,6 +2882,80 @@ INLINE void TCachedBlockSlab::InvalidateCachedRead() new( &m_cbidLastRead ) CCachedBlockId(); } +template +INLINE void TCachedBlockSlab::TrackInvalidSlots( _In_ const CCachedBlockSlot& slotBefore, + _In_ const CCachedBlockSlot& slotAfter ) +{ + if ( !slotBefore.FValid() && slotAfter.FValid() ) + { + TrackValidSlot( slotAfter ); + } + else if ( slotBefore.FValid() && !slotAfter.FValid() ) + { + TrackInvalidSlot( slotAfter ); + } +} + +template +INLINE void TCachedBlockSlab::TrackValidSlot( _In_ const CCachedBlockSlot& slot ) +{ + Assert( slot.Chno() != chnoInvalid ); + Assert( slot.Slno() != slnoInvalid ); + + Assert( m_cInvalid > 0 ); + m_cInvalid--; + + Assert( m_chnoInvalidHint != chnoInvalid ); + Assert( m_slnoInvalidHint != slnoInvalid ); + + if ( slot.Chno() == m_chnoInvalidHint && slot.Slno() == m_slnoInvalidHint ) + { + m_slnoInvalidHint = (SlotNumber)( (size_t)m_slnoInvalidHint + 1 ); + if ( m_slnoInvalidHint >= (SlotNumber)CCachedBlockChunk::Ccbl() ) + { + m_slnoInvalidHint = (SlotNumber)0; + m_chnoInvalidHint = (ChunkNumber)( (size_t)m_chnoInvalidHint + 1 ); + if ( m_chnoInvalidHint >= (ChunkNumber)m_ccbc ) + { + m_chnoInvalidHint = (ChunkNumber)0; + } + } + + Assert( m_chnoInvalidHint != chnoInvalid ); + Assert( m_chnoInvalidHint >= (ChunkNumber)0 ); + Assert( m_chnoInvalidHint < (ChunkNumber)m_ccbc ); + + Assert( m_slnoInvalidHint != slnoInvalid ); + Assert( m_slnoInvalidHint >= (SlotNumber)0 ); + Assert( m_slnoInvalidHint < (SlotNumber)CCachedBlockChunk::Ccbl() ); + + Assert( !( slot.Chno() == m_chnoInvalidHint && slot.Slno() == m_slnoInvalidHint ) ); + } +} + +template +INLINE void TCachedBlockSlab::TrackInvalidSlot( _In_ const CCachedBlockSlot& slot ) +{ + Assert( slot.Chno() != chnoInvalid ); + Assert( slot.Slno() != slnoInvalid ); + + Assert( m_cInvalid < m_ccbc * CCachedBlockChunk::Ccbl() ); + m_cInvalid++; + + Assert( ( m_chnoInvalidHint == chnoInvalid ) == ( m_slnoInvalidHint == slnoInvalid ) ); + + if ( ( m_chnoInvalidHint == chnoInvalid && m_slnoInvalidHint == slnoInvalid ) || + slot.Chno() < m_chnoInvalidHint || + ( slot.Chno() == m_chnoInvalidHint && slot.Slno() < m_slnoInvalidHint ) ) + { + m_chnoInvalidHint = slot.Chno(); + m_slnoInvalidHint = slot.Slno(); + } + + Assert( m_chnoInvalidHint != chnoInvalid ); + Assert( m_slnoInvalidHint != slnoInvalid ); +} + template INLINE ERR TCachedBlockSlab::ErrResetChunk( _In_ const size_t icbc ) { @@ -3161,6 +3325,24 @@ class CCachedBlockSlab // cbsm return err; } +#pragma push_macro( "new" ) +#undef new + + using CPool = TPool; + + void* operator new( _In_ const size_t cb ) + { + return CPool::PvAllocate(); + } + + void operator delete( _In_opt_ void* const pv ) + { + void* pvT = pv; + CPool::Free( &pvT ); + } + +#pragma pop_macro( "new" ) + private: CCachedBlockSlab( _In_ IFileFilter* const pff, @@ -3171,7 +3353,7 @@ class CCachedBlockSlab // cbsm _In_ const QWORD icbwcBase, _In_ const ClusterNumber clnoMin, _In_ const ClusterNumber clnoMax ) - : TCachedBlockSlab( pff, ibSlab, cbSlab, ccbc, pcbwcm, icbwcBase, clnoMin, clnoMax ) + : TCachedBlockSlab( pff, ibSlab, cbSlab, ccbc, pcbwcm, icbwcBase, clnoMin, clnoMax ) { } }; diff --git a/dev/ese/src/os/blockcache/_hashedlrukcacheslabmanager.hxx b/dev/ese/src/os/blockcache/_hashedlrukcacheslabmanager.hxx index e2a774d8..3f0aa4dc 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcacheslabmanager.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcacheslabmanager.hxx @@ -626,6 +626,24 @@ class CCachedBlockSlabReference : public CCachedBlockSlabWrapper m_pcbsm->RemoveSlabReference( &m_pste ); } +#pragma push_macro( "new" ) +#undef new + + using CPool = TPool; + + void* operator new( _In_ const size_t cb ) + { + return CPool::PvAllocate(); + } + + void operator delete( _In_opt_ void* const pv ) + { + void* pvT = pv; + CPool::Free( &pvT ); + } + +#pragma pop_macro( "new" ) + private: TCachedBlockSlabManager* const m_pcbsm; diff --git a/dev/ese/src/os/blockcache/_hashedlrukcacheslabwrapper.hxx b/dev/ese/src/os/blockcache/_hashedlrukcacheslabwrapper.hxx index f21ed04b..51a85262 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcacheslabwrapper.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcacheslabwrapper.hxx @@ -140,6 +140,11 @@ class TCachedBlockSlabWrapper // fw return m_piInner->FDirty(); } + int CInvalidSlot() override + { + return m_piInner->CInvalidSlot(); + } + ERR ErrSave( _In_opt_ const ICachedBlockSlab::PfnSlabSaved pfnSlabSaved, _In_opt_ const DWORD_PTR keySlabSaved ) override { diff --git a/dev/ese/src/os/blockcache/_hashedlrukcachethreadlocalstorage.hxx b/dev/ese/src/os/blockcache/_hashedlrukcachethreadlocalstorage.hxx index ff156626..d4f84a3c 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcachethreadlocalstorage.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcachethreadlocalstorage.hxx @@ -113,9 +113,13 @@ class CHashedLRUKCacheThreadLocalStorage // ctls { if ( pcbs ) { + QWORD ibSlab = 0; + CallS( pcbs->ErrGetPhysicalId( &ibSlab ) ); + Assert( ibSlab ); + for ( size_t iibSlab = 0; iibSlab < s_cibSlab; iibSlab++ ) { - if ( pcbs == m_rgpcbsSlab[ iibSlab ] ) + if ( m_rgibSlab[ iibSlab ] == ibSlab ) { return fTrue; } @@ -129,7 +133,7 @@ class CHashedLRUKCacheThreadLocalStorage // ctls { for ( size_t iibSlab = 0; iibSlab < s_cibSlab; iibSlab++ ) { - if ( m_rgpcbsSlab[ iibSlab ] ) + if ( m_rgibSlab[ iibSlab ] ) { return fTrue; } From 2b96fc32345553a164b2ce4982a4571db5e50200 Mon Sep 17 00:00:00 2001 From: Anil Ruia Date: Wed, 9 Nov 2022 07:30:46 +0000 Subject: [PATCH 091/102] Rolled back RCEs can appear out of order because Rollback does not always nullify RCEs in order [Substrate:e5e3c04eab25511212fa09aa1836d6a2e5011842] --- dev/ese/src/ese/ver.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/ese/src/ese/ver.cxx b/dev/ese/src/ese/ver.cxx index 04013d87..916e1808 100644 --- a/dev/ese/src/ese/ver.cxx +++ b/dev/ese/src/ese/ver.cxx @@ -1234,7 +1234,7 @@ INLINE VOID RCE::SetPrcePrevOfNode( RCE * prce ) default: { - Assert( fFalse ); + Assert( m_fRolledBack ); } } } From 84ad4e382ff4719a35d7f6a0e950a8d8b0a2f377 Mon Sep 17 00:00:00 2001 From: Anil Ruia Date: Wed, 9 Nov 2022 07:56:56 +0000 Subject: [PATCH 092/102] JetDupSession2 to copy trxBegin0 for readonly transactions [Substrate:0d40336f41a9bb00df2c23df38982f23242d5987] --- dev/ese/published/inc/jethdr.w | 12 +++++++ dev/ese/src/ese/dir.cxx | 2 +- dev/ese/src/ese/esent.def | 3 ++ dev/ese/src/ese/jetapi.cxx | 43 ++++++++++++++++++++++--- dev/ese/src/ese/pib.cxx | 37 ++++++++++++++++++++++ dev/ese/src/inc/pib.hxx | 47 +++++++++++++++++++--------- dev/ese/src/noncore/interop/grbits.h | 1 + 7 files changed, 125 insertions(+), 20 deletions(-) diff --git a/dev/ese/published/inc/jethdr.w b/dev/ese/published/inc/jethdr.w index bcb174e1..b6a9e759 100644 --- a/dev/ese/published/inc/jethdr.w +++ b/dev/ese/published/inc/jethdr.w @@ -4444,6 +4444,11 @@ typedef struct // end_PubEsent #define JET_bitForceSessionClosed 0x00000001 + + /* Flags for JetDupSession2 */ + +#define JET_bitDupReadOnlySnapshot 0x00000001 // Duplicate the transaction snapshot point (including transaction context) for readonly transaction. + // The new session is returned in level 1 read-only transaction. // begin_PubEsent /* Flags for JetAttachDatabase/JetOpenDatabase */ @@ -7299,6 +7304,13 @@ JetEndSession( #pragma endregion // end_PubEsent + +JET_ERR JET_API +JetDupSession2( + _In_ JET_SESID sesid, + _In_ JET_GRBIT grbit, + _Out_ JET_SESID * psesid ); + #if ( JET_VERSION >= 0x0600 ) #pragma region Desktop Family diff --git a/dev/ese/src/ese/dir.cxx b/dev/ese/src/ese/dir.cxx index 6900b09b..5578125b 100644 --- a/dev/ese/src/ese/dir.cxx +++ b/dev/ese/src/ese/dir.cxx @@ -1861,7 +1861,7 @@ ERR ErrDIRBeginTransaction( PIB *ppib, const TRXID trxid, const JET_GRBIT grbit else ppib->ResetFReadOnlyTrx(); - PIBSetTrxBegin0( ppib ); + ppib->PIBSetTrxBegin0(); } else if( prceNil != ppib->prceNewest ) { diff --git a/dev/ese/src/ese/esent.def b/dev/ese/src/ese/esent.def index 16afedb5..242d95f4 100644 --- a/dev/ese/src/ese/esent.def +++ b/dev/ese/src/ese/esent.def @@ -322,6 +322,9 @@ EXPORTS ALIAS1(JetDupSession,8) ALIAS2(JetDupSession,8) + ALIAS1(JetDupSession2,12) + ALIAS2(JetDupSession2,12) + ALIAS1(JetEnableMultiInstanceA,12) ALIAS2(JetEnableMultiInstanceA,12) ALIAS1(JetEnableMultiInstanceW,12) diff --git a/dev/ese/src/ese/jetapi.cxx b/dev/ese/src/ese/jetapi.cxx index bf47b797..754e588d 100644 --- a/dev/ese/src/ese/jetapi.cxx +++ b/dev/ese/src/ese/jetapi.cxx @@ -12619,7 +12619,7 @@ JET_ERR JET_API JetBeginSessionW( JET_TRY( opBeginSession, JetBeginSessionExW( instance, psesid, wszUserName, wszPassword ) ); } -LOCAL JET_ERR JetDupSessionEx( _In_ JET_SESID sesid, _Out_ JET_SESID *psesid ) +LOCAL JET_ERR JetDupSessionEx( _In_ JET_SESID sesid, _In_ JET_GRBIT grbit, _Out_ JET_SESID *psesid ) { APICALL_SESID apicall( opDupSession ); @@ -12633,17 +12633,50 @@ LOCAL JET_ERR JetDupSessionEx( _In_ JET_SESID sesid, _Out_ JET_SESID *psesid ) if ( apicall.FEnter( sesid ) ) { - apicall.LeaveAfterCall( ErrIsamBeginSession( - (JET_INSTANCE)PinstFromSesid( sesid ), - psesid ) ); + ERR err; + PIB *ppib = (PIB *)sesid; + if ( grbit & ~JET_bitDupReadOnlySnapshot ) + { + err = ErrERRCheck( JET_errInvalidGrbit ); + } + else if ( grbit == JET_bitDupReadOnlySnapshot && ppib->Level() == 0 ) + { + err = ErrERRCheck( JET_errNotInTransaction ); + } + else if ( grbit == JET_bitDupReadOnlySnapshot && ( !ppib->FReadOnlyTrx() || ppib->Level() != 1 ) ) + { + err = ErrERRCheck( JET_errIllegalOperation ); + } + else + { + err = ErrIsamBeginSession( (JET_INSTANCE)PinstFromSesid( sesid ), psesid ); + if ( err >= JET_errSuccess && grbit == JET_bitDupReadOnlySnapshot ) + { + err = ((PIB *)*psesid)->ErrDupReadOnlyTransaction( ppib ); + if ( err < 0 ) + { + CallS( ErrIsamEndSession( *psesid, 0 ) ); + *psesid = JET_sesidNil; + } + } + } + + apicall.LeaveAfterCall( err ); } return apicall.ErrResult(); } + JET_ERR JET_API JetDupSession( _In_ JET_SESID sesid, _Out_ JET_SESID *psesid ) { JET_VALIDATE_SESID( sesid ); - JET_TRY( opDupSession, JetDupSessionEx( sesid, psesid ) ); + JET_TRY( opDupSession, JetDupSessionEx( sesid, 0, psesid ) ); +} + +JET_ERR JET_API JetDupSession2( _In_ JET_SESID sesid, _In_ JET_GRBIT grbit, _Out_ JET_SESID *psesid ) +{ + JET_VALIDATE_SESID( sesid ); + JET_TRY( opDupSession, JetDupSessionEx( sesid, grbit, psesid ) ); } /*================================================================= diff --git a/dev/ese/src/ese/pib.cxx b/dev/ese/src/ese/pib.cxx index 7422cb69..8a3a6b9e 100644 --- a/dev/ese/src/ese/pib.cxx +++ b/dev/ese/src/ese/pib.cxx @@ -355,6 +355,10 @@ void PIB::DecrementLevel() m_trxidstack.Pop(); --m_level; Assert(m_level >= 0); + if ( m_level == 0 ) + { + m_fDupedTransaction = fFalse; + } } // ================================================================ @@ -973,6 +977,39 @@ ERR VTAPI ErrIsamResetSessionContext( JET_SESID sesid ) return JET_errSuccess; } +ERR PIB::ErrDupReadOnlyTransaction( PIB *ppibCopyFrom ) +{ + ERR err = JET_errSuccess; + Assert( ppibCopyFrom->FReadOnlyTrx() ); + Assert( ppibCopyFrom->Level() == 1 ); + Assert( Level() == 0 ); + + // Copy all the user context, like commit-context/cache-priority/tracing-context + CallR( ErrSetClientCommitContextGeneric( ppibCopyFrom->PvClientCommitContextGeneric(), ppibCopyFrom->CbClientCommitContextGeneric() ) ); + m_fCommitContextContainsCustomerData = ppibCopyFrom->m_fCommitContextContainsCustomerData; + m_fCommitContextNeedPreCommitCallback = ppibCopyFrom->m_fCommitContextNeedPreCommitCallback; + m_pctCachePriority = ppibCopyFrom->m_pctCachePriority; + m_grbitUserIoPriority = ppibCopyFrom->m_grbitUserIoPriority; + m_qosIoPriority = ppibCopyFrom->m_qosIoPriority; + m_utc.DeepCopy( ppibCopyFrom->m_utc ); + static_assert( JET_sesparamCommitContextNeedPreCommitCallback /* last known */ + 1 == JET_sesparamMaxValueInvalid, "Please make sure you add duping of new sesparam here and update assert" ); + + // Now set session context so the new session can be moved if needed to another thread + CallS( ErrPIBSetSessionContext( ppibCopyFrom->dwTrxContext ) ); + PIBSetTrxContext(); + + SetFReadOnlyTrx(); + + PIBSetTrxBegin0( ppibCopyFrom ); + + CallS( ErrLGBeginTransaction( this ) ); + VERBeginTransaction( this, ppibCopyFrom->m_trxidstack.Peek0() ); + + m_fDupedTransaction = fTrue; + + return JET_errSuccess; +} + VOID PIBReportSessionSharingViolation( const PIB * const ppib ) { WCHAR wszSession[32]; diff --git a/dev/ese/src/inc/pib.hxx b/dev/ese/src/inc/pib.hxx index 6ddfbdee..2b12a677 100644 --- a/dev/ese/src/inc/pib.hxx +++ b/dev/ese/src/inc/pib.hxx @@ -252,7 +252,6 @@ public: private: DWORD_PTR dwTrxContext; // default is thread id. - friend VOID PIBSetTrxBegin0( PIB * const ppib ); LGPOS lgposTrxBegin0; // temporary, only for debugging // flags @@ -281,6 +280,7 @@ private: FLAG32 m_fDBScan:1; // session is for DBSCAN FLAG32 m_fLeakReport:1; // session is for the leak report FLAG32 m_fEnforceOptionallyUniqueIndices:1;// session is enforcing uniqueness on optionally unique indices. + FLAG32 m_fDupedTransaction:1; // transaction on session is duped from another session #if defined(DEBUG) || defined(EXPENSIVE_INLINE_EXTENT_PAGE_COUNT_CACHE_VALIDATION) FLAG32 m_fUpdatingExtentPageCountCache:1; // session is currently updating the cached CPG values in the catalog #endif @@ -583,6 +583,9 @@ public: UserTraceContext* Putc() { return &m_utc; } const UserTraceContext* Putc() const { return &m_utc; } + VOID PIBSetTrxBegin0( PIB * const ppibCopyFrom ); + ERR ErrDupReadOnlyTransaction( PIB *ppib ); + #ifdef DEBUGGER_EXTENSION VOID DumpOpenTrxUserDetails( _In_ CPRINTF * const pcprintf, _In_ const DWORD_PTR dwOffset = 0, _In_ LONG lgenTip = 0 ) const; VOID DumpBasic( CPRINTF * pcprintf, DWORD_PTR dwOffset = 0 ) const; @@ -1252,44 +1255,60 @@ INLINE VOID PIBSetLevelRollback( PIB *ppib, LEVEL levelT ) #define TRXID_INCR 4 // ================================================================ -INLINE VOID PIBSetTrxBegin0( PIB * const ppib ) +INLINE VOID PIB::PIBSetTrxBegin0( PIB * const ppibCopyFrom = NULL ) // ================================================================ // // Used when a transaction starts from level 0 or refreshes // //- { - INST* const pinst = PinstFromPpib( ppib ); + INST* const pinst = PinstFromPpib( this ); INST::PLS* const ppls = pinst->Ppls(); ppls->m_rwlPIBTrxOldest.EnterAsWriter(); - if ( ppib->FReadOnlyTrx() ) + if ( ppibCopyFrom ) + { + Assert( ppibCopyFrom->Level() > 0 ); + Assert( ppibCopyFrom->FReadOnlyTrx() ); + Assert( FReadOnlyTrx() ); + + trxBegin0 = ppibCopyFrom->trxBegin0; + } + else if ( FReadOnlyTrx() ) { - ppib->trxBegin0 = pinst->m_trxNewest + TRXID_INCR/2; + trxBegin0 = pinst->m_trxNewest + TRXID_INCR/2; } else { - ppib->trxBegin0 = TRX( AtomicExchangeAdd( (LONG *)&pinst->m_trxNewest, TRXID_INCR ) ) + TRXID_INCR; + trxBegin0 = TRX( AtomicExchangeAdd( (LONG *)&pinst->m_trxNewest, TRXID_INCR ) ) + TRXID_INCR; } // collect lgpos for debugging purpose - ppib->lgposTrxBegin0 = pinst->m_plog->LgposLGLogTipNoLock(); + lgposTrxBegin0 = pinst->m_plog->LgposLGLogTipNoLock(); - ppib->m_pplsTrxOldest = ppls; + m_pplsTrxOldest = ppls; #ifdef DEBUG // This trxBegin0 better not be older than the trxBegin0 of the first session on the invasive list or // TrxOldest calculation will be busted PIB* const ppibTrxOldest = ppls->m_ilTrxOldest.PrevMost(); - Assert ( !ppibTrxOldest || ( INT( ppibTrxOldest->trxBegin0 - ppib->trxBegin0 ) <= 0 ) ); + Assert ( !ppibTrxOldest || ( INT( ppibTrxOldest->trxBegin0 - trxBegin0 ) <= 0 ) ); #endif - // Oldest transaction can only change if this is the first transaction - if ( ppls->m_ilTrxOldest.PrevMost() == NULL ) + // Insert in trxBegin0 order in the TrxOldest CInvasiveList + if ( ppibCopyFrom ) { - pinst->SetTrxOldestCachedMayBeStale(); + ppls->m_ilTrxOldest.Insert( this, ppibCopyFrom ); + } + else + { + // Oldest transaction can only change if this is the first transaction + if ( ppls->m_ilTrxOldest.PrevMost() == NULL ) + { + pinst->SetTrxOldestCachedMayBeStale(); + } + ppls->m_ilTrxOldest.InsertAsNextMost( this ); } - ppls->m_ilTrxOldest.InsertAsNextMost( ppib ); ppls->m_rwlPIBTrxOldest.LeaveAsWriter(); - ppib->trxCommit0 = trxMax; + trxCommit0 = trxMax; } // ================================================================ diff --git a/dev/ese/src/noncore/interop/grbits.h b/dev/ese/src/noncore/interop/grbits.h index 3314d270..0a4f9590 100644 --- a/dev/ese/src/noncore/interop/grbits.h +++ b/dev/ese/src/noncore/interop/grbits.h @@ -137,6 +137,7 @@ MSINTERNAL enum class MJET_GRBIT IdleAvailBuffersStatus = 0x00000020, // Returns JET_wrnIdleFull when database cache available buffers is less than the JET_paramStartFlushThreshold setting. IdleWaitForAsyncActivity = 0x00000040, // Waits for all async activity to quiesce. Returns JET_wrnRemainingVersions if there are still pending version store buckets. ForceSessionClosed = 0x00000001, + DupReadOnlySnapshot = 0x00000001, // Duplicate the transaction snapshot point (including transaction context) for readonly transaction. DbReadOnly = 0x00000001, DbExclusive = 0x00000002, // multiple opens allowed DbSingleExclusive = 0x00000002, // NOT CURRENTLY IMPLEMENTED - currently maps to JET_bitDbExclusive From 8579ba8ff6ea0d00605bc0485ce90c66592d1b62 Mon Sep 17 00:00:00 2001 From: Build Team Date: Wed, 9 Nov 2022 18:28:45 +0000 Subject: [PATCH 093/102] Revert: Change is causing test failures. This change had prev passed an ESE full test pass. The reverted PR age: 25.63 hours old. [Substrate:f67bf12757b9af6ecb1901f0efdc901242515ba2] --- dev/ese/src/ese/cat.cxx | 3 ++ dev/ese/src/ese/fcb.cxx | 4 ++- dev/ese/src/ese/fcreate.cxx | 6 ++++ dev/ese/src/inc/fcb.hxx | 57 ++++++------------------------------- dev/ese/src/inc/tdb.hxx | 11 +++---- 5 files changed, 26 insertions(+), 55 deletions(-) diff --git a/dev/ese/src/ese/cat.cxx b/dev/ese/src/ese/cat.cxx index ca2c56c5..91748af0 100644 --- a/dev/ese/src/ese/cat.cxx +++ b/dev/ese/src/ese/cat.cxx @@ -8381,7 +8381,10 @@ ERR ErrCATInitFCB( FUCB *pfucbTable, OBJID objidTable, const BOOL fSkipPgnoFDPLa pfcbTemplateTable->GetAPISpaceHints( &jsphTemplate ); // flag the template as static, i.e., it can't be opened for modification anymore + // need the lock because it's a bit-field, so read/write must be atomic + pfcbTemplateTable->Lock(); pfcbTemplateTable->SetTemplateStatic(); + pfcbTemplateTable->Unlock(); // Increment ref count so that template table does not get purged pfcbTemplateTable->IncrementRefCount(); diff --git a/dev/ese/src/ese/fcb.cxx b/dev/ese/src/ese/fcb.cxx index 66f3812b..cd866ad5 100644 --- a/dev/ese/src/ese/fcb.cxx +++ b/dev/ese/src/ese/fcb.cxx @@ -2881,8 +2881,10 @@ ERR FCB::ErrSetUpdatingAndEnterDML( PIB *ppib, BOOL fWaitOnConflict ) Assert( IsUnlocked_( LOCK_TYPE::ltShared ) ); // If DDL is fixed, then there's no contention with CreateIndex - if ( FNeedDML_() ) + if ( !FFixedDDL() ) { + Assert( FTypeTable() ); // Sorts and temp tables have fixed DDL. + Assert( !FTemplateTable() ); Assert( Ptdb() != ptdbNil ); CheckIndexing: diff --git a/dev/ese/src/ese/fcreate.cxx b/dev/ese/src/ese/fcreate.cxx index 851753b2..fada7fb9 100644 --- a/dev/ese/src/ese/fcreate.cxx +++ b/dev/ese/src/ese/fcreate.cxx @@ -2736,7 +2736,13 @@ ERR ErrFILECreateTable( PIB *ppib, IFMP ifmp, JET_TABLECREATE5_A *ptablecreate, if ( pfcbNil != pfcbTemplateTable ) { // flag the template as static, i.e., it can't be opened for modification anymore + // need the lock because it's a bit-field, so read/write must be atomic + // NOTE: Once upon a time, the bit fields needed this lock. Not anymore; the bits + // are set atomically. Leaving the lock in place now just out of worry they have + // some other unintended sync impact. + pfcbTemplateTable->Lock(); pfcbTemplateTable->SetTemplateStatic(); + pfcbTemplateTable->Unlock(); // Increment ref count so that template table does not get purged pfcbTemplateTable->IncrementRefCount(); diff --git a/dev/ese/src/inc/fcb.hxx b/dev/ese/src/inc/fcb.hxx index 0515d1f0..8112f276 100644 --- a/dev/ese/src/inc/fcb.hxx +++ b/dev/ese/src/inc/fcb.hxx @@ -1125,7 +1125,6 @@ private: BOOL FWRefCountOK_(); #endif BOOL FNeedLock_() const; - BOOL FNeedDML_(); VOID EnterDML_(); VOID LeaveDML_(); VOID AssertDML_() const; @@ -1666,24 +1665,8 @@ INLINE BOOL FCB::FTemplateStatic() const { return !!(m_ulFCBFlags & mskFC INLINE VOID FCB::SetTemplateStatic() { Assert( FTemplateTable() ); - - // NOTE: Perform this change while in the DDL lock. this ensures that we don't - // change the state while another thread is between EnterDML and LeaveDML because - // it could change the result of FNeedDML_ which could cause them to fail to release - // the lock - - // NOTE: Once upon a time, the bit fields needed this lock. Not anymore; the bits - // are set atomically. Leaving the lock in place now just out of worry they have - // some other unintended sync impact. - - if ( !FTemplateStatic() ) - { - EnterDDL(); - Lock(); - AtomicExchangeSet( &m_ulFCBFlags, mskFCBTemplateStatic ); - Unlock(); - LeaveDDL(); - } + Assert( IsLocked() ); + AtomicExchangeSet( &m_ulFCBFlags, mskFCBTemplateStatic ); } // There is no FCB::ResetTemplateStatic(), since // "Flagging the template as static is currently a one-way trip." @@ -1882,41 +1865,15 @@ INLINE BOOL FCB::FNeedLock_() const } // Enters FCB's critical section for data set/retrieve only if needed. - -INLINE BOOL FCB::FNeedDML_() -{ - // we must always use the DML lock for normal tables - - if ( !FFixedDDL() ) - { - return fTrue; - } - - // these are the only types of tables that should have fixed DDL - - Assert( FTypeTable() || FTypeTemporaryTable() || FTypeSort() ); - - // if this is a template table and the template is not yet confirmed to be - // static schema then we still need to use the DML lock. this covers the - // cases where template table schema is upgraded before the derived tables - // are opened for use - - if ( FTypeTable() && FTemplateTable() && !FTemplateStatic() ) - { - return fTrue; - } - - return fFalse; -} - INLINE VOID FCB::EnterDML() { Assert( FTypeTable() || FTypeTemporaryTable() || FTypeSort() ); Assert( Ptdb() != ptdbNil ); - if ( FNeedDML_() ) + if ( !FFixedDDL() ) { Assert( FTypeTable() ); // Sorts and temp tables have fixed DDL. + Assert( !FTemplateTable() ); EnterDML_(); } @@ -1929,9 +1886,10 @@ INLINE VOID FCB::LeaveDML() Assert( Ptdb() != ptdbNil ); AssertDML(); - if ( FNeedDML_() ) + if ( !FFixedDDL() ) { Assert( FTypeTable() ); // Sorts and temp tables have fixed DDL. + Assert( !FTemplateTable() ); LeaveDML_(); } } @@ -1943,9 +1901,10 @@ INLINE VOID FCB::AssertDML() Assert( FTypeTable() || FTypeTemporaryTable() || FTypeSort() ); Assert( Ptdb() != ptdbNil ); - if ( FNeedDML_() ) + if ( !FFixedDDL() ) { Assert( FTypeTable() ); // Sorts and temp tables have fixed DDL. + Assert( !FTemplateTable() ); Assert( IsUnlocked() ); AssertDML_(); } diff --git a/dev/ese/src/inc/tdb.hxx b/dev/ese/src/inc/tdb.hxx index 386203fa..fba05ebd 100644 --- a/dev/ese/src/inc/tdb.hxx +++ b/dev/ese/src/inc/tdb.hxx @@ -1568,7 +1568,8 @@ INLINE VOID TDB::SetFLid64( BOOL fLid64 ) INLINE VOID FCB::ResetUpdating_() { Assert( FTypeTable() ); // Sorts and temp tables have fixed DDL. - Assert( FNeedDML_() ); + Assert( !FTemplateTable() ); + Assert( !FFixedDDL() ); Assert( Ptdb() != ptdbNil ); Ptdb()->LeaveUpdating(); @@ -1576,7 +1577,7 @@ INLINE VOID FCB::ResetUpdating_() INLINE VOID FCB::ResetUpdatingAndLeaveDML() { // If DDL is fixed, then there's no contention with CreateIndex - if ( FNeedDML_() ) + if ( !FFixedDDL() ) { LeaveDML_(); ResetUpdating_(); @@ -1585,7 +1586,7 @@ INLINE VOID FCB::ResetUpdatingAndLeaveDML() INLINE VOID FCB::ResetUpdating() { // If DDL is fixed, then there's no contention with CreateIndex - if ( FNeedDML_() ) + if ( !FFixedDDL() ) { ResetUpdating_(); } @@ -1598,7 +1599,7 @@ INLINE VOID FCB::SetIndexing() Assert( Ptdb() != ptdbNil ); // Can only override FixedDDL flag if we have exclusive use of the table. - Assert( FNeedDML_() || CrefDomainDenyRead() > 0 ); + Assert( !FFixedDDL() || CrefDomainDenyRead() > 0 ); Ptdb()->EnterIndexing(); } @@ -1610,7 +1611,7 @@ INLINE VOID FCB::ResetIndexing() Assert( Ptdb() != ptdbNil ); // Can only override FixedDDL flag if we have exclusive use of the table. - Assert( FNeedDML_() || CrefDomainDenyRead() > 0 ); + Assert( !FFixedDDL() || CrefDomainDenyRead() > 0 ); Ptdb()->LeaveIndexing(); } From 311627bbc6ba1c0a5a38ec36e49c80061e6bc416 Mon Sep 17 00:00:00 2001 From: Nathanael Cheriere Date: Thu, 10 Nov 2022 08:48:02 +0000 Subject: [PATCH 094/102] ESE: Removing EBC tracing calls. Deactivated the tracing of EBC events that were using the same ETW events as the BFResMgr. This is required in order to use the sub sampling of the cache traces: EBC produces significantly more events than a subsampled BfResMgr, which produces large trace files, defeating the point of subsampling. Subsampling could be added to EBC tracing later. [Substrate:55439d4abe0f32d6b53c2b1dd0930e6f7b0b559c] --- dev/ese/src/os/blockcache/_cachetelemetry.hxx | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/dev/ese/src/os/blockcache/_cachetelemetry.hxx b/dev/ese/src/os/blockcache/_cachetelemetry.hxx index 8b307faa..4510260e 100644 --- a/dev/ese/src/os/blockcache/_cachetelemetry.hxx +++ b/dev/ese/src/os/blockcache/_cachetelemetry.hxx @@ -41,12 +41,17 @@ class TCacheTelemetry // ctm BFRequestTraceFlags BfrtfReference( _In_ const BOOL fRead, _In_ const BOOL fCacheIfPossible ); }; +#ifdef DEBUG +#define ENABLE_BFEBC_TRACING +#endif + template< class I > void TCacheTelemetry::Miss( _In_ const ICacheTelemetry::FileNumber filenumber, _In_ const ICacheTelemetry::BlockNumber blocknumber, _In_ const BOOL fRead, _In_ const BOOL fCacheIfPossible ) { +#ifdef ENABLE_BFEBC_TRACING if ( filenumber == filenumberInvalid ) { return; @@ -62,6 +67,7 @@ void TCacheTelemetry::Miss( _In_ const ICacheTelemetry::FileNumber filenumb 100, BfrtfReference( fRead, fCacheIfPossible ), getutc->context.nClientType ); +#endif } template< class I > @@ -70,6 +76,7 @@ void TCacheTelemetry::Hit( _In_ const ICacheTelemetry::FileNumber filenumb _In_ const BOOL fRead, _In_ const BOOL fCacheIfPossible ) { +#ifdef ENABLE_BFEBC_TRACING if ( filenumber == filenumberInvalid ) { return; @@ -87,12 +94,14 @@ void TCacheTelemetry::Hit( _In_ const ICacheTelemetry::FileNumber filenumb 100, BfrtfReference( fRead, fCacheIfPossible ), getutc->context.nClientType ); +#endif } template< class I > void TCacheTelemetry::Update( _In_ const ICacheTelemetry::FileNumber filenumber, _In_ const ICacheTelemetry::BlockNumber blocknumber ) { +#ifdef ENABLE_BFEBC_TRACING if ( filenumber == filenumberInvalid ) { return; @@ -120,6 +129,7 @@ void TCacheTelemetry::Update( _In_ const ICacheTelemetry::FileNumber file petc->iorReason.Ioru(), petc->iorReason.Iorf(), petc->nParentObjectClass ); +#endif } template< class I > @@ -127,6 +137,7 @@ void TCacheTelemetry::Write( _In_ const ICacheTelemetry::FileNumber filenumb _In_ const ICacheTelemetry::BlockNumber blocknumber, _In_ const BOOL fReplacementPolicy ) { +#ifdef ENABLE_BFEBC_TRACING if ( filenumber == filenumberInvalid ) { return; @@ -153,6 +164,7 @@ void TCacheTelemetry::Write( _In_ const ICacheTelemetry::FileNumber filenumb petc->iorReason.Ioru(), petc->iorReason.Iorf(), petc->nParentObjectClass ); +#endif } template< class I > @@ -160,6 +172,7 @@ void TCacheTelemetry::Evict( _In_ const ICacheTelemetry::FileNumber filenumb _In_ const ICacheTelemetry::BlockNumber blocknumber, _In_ const BOOL fReplacementPolicy ) { +#ifdef ENABLE_BFEBC_TRACING if ( filenumber == filenumberInvalid ) { return; @@ -172,6 +185,7 @@ void TCacheTelemetry::Evict( _In_ const ICacheTelemetry::FileNumber filenumb JET_errSuccess, fReplacementPolicy ? bfefReasonAvailPool : bfefReasonPurgePage, 100 ); +#endif } template< class I > From 0ba0df186af8c9eecafcd9b7b1526838a0fc41c7 Mon Sep 17 00:00:00 2001 From: TAW Date: Thu, 10 Nov 2022 19:17:50 +0000 Subject: [PATCH 095/102] Add casts and some #defines preliminary to running GCR2 When GCR2 is run and we collapse all "long"s to "int"s, we end up needing some casts to make the OS happy. Making a small number of those casts here. They're NO-OPs in the current code. Also, first example of the #define foolery we need to make DWORD work. DWORD in Windows is based on long. If I was being harsh, I'd remove all usages of DWORD from our code. But, DWORD is too deep in our psyches to do that. So, GCR2 will redefine DWORD to be based on "int" and then we have to play a #define game to be able to include Windows headers without type redefinition errors. Far in the future I see some reorganization of the #include order so that windows specific headers are included in far fewer places. [Substrate:4463f3c49fff90ba1ded53f1ba56886ea5cb6ad8] --- dev/ese/published/inc/os/error.hxx | 35 ------------------------------ dev/ese/published/inc/os/math.hxx | 2 +- dev/ese/published/inc/sync.hxx | 6 ++--- dev/ese/src/inc/esestd.hxx | 5 ----- dev/ese/src/sync/sync.cxx | 18 ++++++++------- 5 files changed, 14 insertions(+), 52 deletions(-) diff --git a/dev/ese/published/inc/os/error.hxx b/dev/ese/published/inc/os/error.hxx index 0437a6cf..ee00debf 100644 --- a/dev/ese/published/inc/os/error.hxx +++ b/dev/ese/published/inc/os/error.hxx @@ -369,41 +369,6 @@ extern void (__stdcall *g_pfnEnforceContextFail)( const WCHAR* wszContext, const #define EnforceSz( exp, szTag ) EnforceAtSz( exp, szTag, __FILE__, __LINE__ ) -// ------------------------------------------------------------------------------------------------ -// -// Compiler Assert -// - -// -// C_ASSERT() can be used to perform many compile-time assertions: -// type sizes, field offsets, etc. -// -// An assertion failure results in -// error C2118: negative subscript. -// If you have not defined a static / compile-time constraint results in -// error C3861: 'countof': identifier not found -// error C2086: 'char __C_ASSERT__[1]' : redefinition -// -// Copied over from winnt.h - -#define C_ASSERT(e) typedef char __C_ASSERT__[(e)?1:-1] -// This C_ASSERT() is not sufficient for a check like this: -// C_ASSERT( dtickMaintCacheSizeRequest <= dtickMaintCacheStatsPeriod / 2 ); -// b/c (at the time) dtickMaintCacheSizeRequest = 0, and so the error -// 2>e:\src\win8\esent2\ds\esent\src\ese\bf.cxx(11397) : error C4296: '<=' : expression is always true -// is returned. It is complaining that "0 <= x" ... no value of x can ever -// make this false ... but since 0 can change (b/c it's a constant that may -// be altered) this is actually a valid C_ASSERT(). - -// Doing a better C_ASSERT() ... -#define S_ASSERT(e) \ - __pragma(warning(push)) \ - __pragma(warning(disable:4296)) \ - typedef char __C_ASSERT__[(e)?1:-1] \ - __pragma(warning(pop)) - - - // ------------------------------------------------------------------------------------------------ // // Exceptions diff --git a/dev/ese/published/inc/os/math.hxx b/dev/ese/published/inc/os/math.hxx index 9b2e9d54..f3d8ad89 100644 --- a/dev/ese/published/inc/os/math.hxx +++ b/dev/ese/published/inc/os/math.hxx @@ -38,7 +38,7 @@ inline ULONG Log2( ULONG x ) inline ULONG Log2( unsigned __int64 x ) { ULONG log2; - BYTE ret = _BitScanReverse64( &log2, x ); + BYTE ret = _BitScanReverse64( ( OS_WIN_ULONG * )&log2, x ); return ret > 0 ? log2 : -1; // log( 0 ) is undefined, represented by -1 } diff --git a/dev/ese/published/inc/sync.hxx b/dev/ese/published/inc/sync.hxx index 92ddb5fa..b3d8161b 100644 --- a/dev/ese/published/inc/sync.hxx +++ b/dev/ese/published/inc/sync.hxx @@ -344,7 +344,7 @@ inline LONG AtomicExchange( LONG* const plTarget, const LONG lValue ) { OSSYNCAssert( IsAtomicallyModifiable( plTarget ) ); - return _InterlockedExchange( plTarget, lValue ); + return _InterlockedExchange( ( OS_WIN_LONG * )plTarget, lValue ); } // atomically sets the target to the specified value, returning the target's @@ -437,7 +437,7 @@ inline LONG AtomicExchangeAdd( LONG * const plTarget, const LONG lValue ) { OSSYNCAssert( IsAtomicallyModifiable( plTarget ) ); - return _InterlockedExchangeAdd( plTarget, lValue ); + return _InterlockedExchangeAdd( ( OS_WIN_LONG * )plTarget, lValue ); } // atomically adds the specified value to the target, returning the target's @@ -460,7 +460,7 @@ inline LONG AtomicCompareExchange( LONG * const plTarget, const LONG lInitial, c { OSSYNCAssert( IsAtomicallyModifiable( plTarget ) ); - return _InterlockedCompareExchange( plTarget, lFinal, lInitial ); + return _InterlockedCompareExchange( ( OS_WIN_LONG * )plTarget, lFinal, lInitial ); } inline ULONG AtomicCompareExchange( ULONG * const pulTarget, const ULONG ulInitial, const ULONG ulFinal ) diff --git a/dev/ese/src/inc/esestd.hxx b/dev/ese/src/inc/esestd.hxx index ddb94878..92b009c7 100644 --- a/dev/ese/src/inc/esestd.hxx +++ b/dev/ese/src/inc/esestd.hxx @@ -17,11 +17,6 @@ #include -typedef __nullterminated char* PSTR; -typedef __nullterminated const char* PCSTR; -typedef __nullterminated wchar_t* PWSTR; -typedef __nullterminated const wchar_t* PCWSTR; - #include #include #include diff --git a/dev/ese/src/sync/sync.cxx b/dev/ese/src/sync/sync.cxx index 0cb8400b..a1b733f1 100644 --- a/dev/ese/src/sync/sync.cxx +++ b/dev/ese/src/sync/sync.cxx @@ -2883,7 +2883,9 @@ CSXWLatch::~CSXWLatch() // errors. +#define DWORD OS_WIN_DWORD #include +#undef DWORD #ifndef ESENT // @@ -3210,7 +3212,7 @@ BOOL FOSSyncIClsRegister( _CLS* pcls ) DWORD dwExitCode; if ( pclsClean->hContext && - GetExitCodeThread( pclsClean->hContext, &dwExitCode ) && + GetExitCodeThread( pclsClean->hContext, ( OS_WIN_DWORD * )&dwExitCode ) && dwExitCode != STILL_ACTIVE ) { // detach this CLS @@ -4437,7 +4439,7 @@ void __cdecl CFPrintF::operator()( const char* szFormat, ... ) SetFilePointerEx( HANDLE( m_hFile ), ibOffset, NULL, FILE_END ); DWORD cbWritten; - WriteFile( HANDLE( m_hFile ), szBuf, DWORD( strlen( szBuf ) * sizeof( char ) ), &cbWritten, NULL ); + WriteFile( HANDLE( m_hFile ), szBuf, DWORD( strlen( szBuf ) * sizeof( char ) ), ( OS_WIN_DWORD * )&cbWritten, NULL ); ReleaseMutex( HANDLE( m_hMutex ) ); } @@ -4589,7 +4591,7 @@ GetExpression( DEBUG_VALUE FullValue; ULONG64 Address = 0; - hr = g_DebugControl->Evaluate( szExpression, DEBUG_VALUE_INT64, &FullValue, &EndIdx ); + hr = g_DebugControl->Evaluate( szExpression, DEBUG_VALUE_INT64, &FullValue, ( OS_WIN_ULONG * )&EndIdx ); if ( SUCCEEDED( hr ) ) { Address = FullValue.I64; @@ -4611,7 +4613,7 @@ FEDBGMemoryRead( // ================================================================ { HRESULT hr; - hr = g_DebugDataSpaces->ReadVirtual( ulAddressInDebuggee, pbBuffer, cbBuffer, pcbRead ); + hr = g_DebugDataSpaces->ReadVirtual( ulAddressInDebuggee, pbBuffer, cbBuffer, ( OS_WIN_ULONG * )pcbRead ); return SUCCEEDED( hr ); } @@ -4655,7 +4657,7 @@ LOCAL BOOL FAddressFromGlobal( const char* const szGlobal, T** const ppt ) DEBUG_VALUE FullValue; ULONG64 Address = 0; - hr = g_DebugControl-> Evaluate( szGlobal, DEBUG_VALUE_INT64, &FullValue, &EndIdx); + hr = g_DebugControl->Evaluate( szGlobal, DEBUG_VALUE_INT64, &FullValue, ( OS_WIN_ULONG * )&EndIdx); if ( SUCCEEDED( hr ) ) { @@ -4683,7 +4685,7 @@ LOCAL BOOL FGlobalFromAddress( T* const pt, __out_bcount(cbMax) PSTR szGlobal, c ulAddress, szGlobal, (ULONG) cbMax, - &cbActual, + ( OS_WIN_ULONG * )&cbActual, &dwOffset ); @@ -6228,12 +6230,12 @@ static BOOL FOSSyncIInit() BOOL fResult; fResult = GetLogicalProcessorInformationEx( RelationGroup, pBuffer, - &BufferSize ); + ( OS_WIN_DWORD * )&BufferSize ); OSSYNCAssert( !fResult && GetLastError() == ERROR_INSUFFICIENT_BUFFER ); pBuffer = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)_alloca( BufferSize ); fResult = GetLogicalProcessorInformationEx( RelationGroup, pBuffer, - &BufferSize ); + ( OS_WIN_DWORD * )&BufferSize ); OSSYNCAssert( fResult ); g_cProcessorGroups = pBuffer->Group.ActiveGroupCount; g_cProcessorsPerGroup = 1; From 7bf3f5f31b89d321475d5f841e2ad23ac23497eb Mon Sep 17 00:00:00 2001 From: Andrew Goodsell Date: Thu, 10 Nov 2022 21:57:24 +0000 Subject: [PATCH 096/102] ESE Block Cache: Perf: Combine slab updates for evict and cache Currently, EBC is written such that if a sync evict is needed to cache something then that evict is processed and journaled separately. The subsequent update to cache something is its own journal entry. This is obviously not optimal because we end up processing and journaling the same slab twice per update. This change reorganizes the code slightly so that these updates are combined. Now, instead of seeing one journal entry evicting an entry and another one caching an entry in its place, this will be combined into one journal entry where it looks like we just replace one entry with another. To aid in analysis and debugging, we will also emit the before image of any evicted slot in the combined journal entry. This slot will be applied to the target slab but will be immediately overwritten with the subsequently added after image. This impacts several key code paths that analyzed the changes in a slab on update: - CUpdateSlabVisitor verifies that the cluster is replaced on any evict/invalidate as this is required to support our recovery scheme - CUpdateSlabVisitor collects slots that are involved in the update - CCachedBlockPresenceFilter::FUpdateSlot updates the presence filter based on evict/cache as required to correctly identify items that may reside in the cache - LogCacheUpdateJournalEntry => TraceCacheUpdateJournalEntry emits a trace on every cache update at do or redo time - ErrDumpJournalEntry dumps each entry in the journal These code paths were updated to use more nuanced checks to still allow the detection of an evict/cache even if it happened to the same slot in one atomic operation. Some other fixes were made: - CCachedBlock::Updno() was exposed to be public because it is used by the new logic above to detect an evict/cache in the same slot using the tuple VolumeId, FileId, FileSerial, CachedBlockNumber, and UpdateNumber to uniquely identify a version of a cached file block - We defined an updnoFirst and explicitly prevent it from being used because there is logic in CCachedBlockPresenceFilter that relies on this value uniquely identifying the first update of an item [Substrate:fc1a83b964e76297b144b8b011919a1bed0e285c] --- dev/ese/published/inc/os/osblockcache.hxx | 8 +- .../noncore/blockcache/interop/CachedBlock.h | 19 +- .../noncore/blockcache/interop/UpdateNumber.h | 5 + .../src/os/blockcache/_hashedlrukcache.hxx | 524 ++++++++++-------- 4 files changed, 334 insertions(+), 222 deletions(-) diff --git a/dev/ese/published/inc/os/osblockcache.hxx b/dev/ese/published/inc/os/osblockcache.hxx index 6be2e853..137a627d 100644 --- a/dev/ese/published/inc/os/osblockcache.hxx +++ b/dev/ese/published/inc/os/osblockcache.hxx @@ -759,13 +759,15 @@ INLINE BOOL operator>=( _In_ const TouchNumber tonoA, _In_ const TouchNumber ton enum class UpdateNumber : USHORT // updno { updnoInvalid = 0, + updnoFirst = 1, updnoMax = 65535, }; constexpr UpdateNumber updnoInvalid = UpdateNumber::updnoInvalid; +constexpr UpdateNumber updnoFirst = UpdateNumber::updnoFirst; constexpr UpdateNumber updnoMax = UpdateNumber::updnoMax; -INLINE UpdateNumber operator+( _In_ const UpdateNumber updno, _In_ const LONG i ) { return ( (LONG)updno + i > (LONG)updnoMax ) ? (UpdateNumber)( (LONG)updnoInvalid + 1 ) : (UpdateNumber)( (LONG)updno + i ); } +INLINE UpdateNumber operator+( _In_ const UpdateNumber updno, _In_ const LONG i ) { return ( (LONG)updno + i > (LONG)updnoMax ) ? (UpdateNumber)( (LONG)updnoFirst + 1 ) : (UpdateNumber)( (LONG)updno + i ); } INLINE int CmpUpdno( _In_ const UpdateNumber updnoA, _In_ const UpdateNumber updnoB ) { return (SHORT)( (USHORT)updnoA - (USHORT)updnoB ); } INLINE BOOL operator<( _In_ const UpdateNumber updnoA, _In_ const UpdateNumber updnoB ) { return CmpUpdno( updnoA, updnoB ) < 0; } INLINE BOOL operator<=( _In_ const UpdateNumber updnoA, _In_ const UpdateNumber updnoB ) { return CmpUpdno( updnoA, updnoB ) <= 0; } @@ -811,6 +813,7 @@ class CCachedBlock // cbl BOOL FDirty() const { return m_fDirty != 0; } BOOL FEverDirty() const { return m_fEverDirty != 0; } BOOL FPurged() const { return m_fPurged != 0; } + UpdateNumber Updno() const { return m_le_updno; } protected: @@ -848,7 +851,6 @@ class CCachedBlock // cbl DWORD DwECC() const { return m_le_dwECC; } TouchNumber Tono0() const { return m_le_rgtono[ 0 ]; } TouchNumber Tono1() const { return m_le_rgtono[ 1 ]; } - UpdateNumber Updno() const { return m_le_updno; } BYTE RgbitReserved0() const { return m_rgbitReserved0; } @@ -1022,7 +1024,7 @@ class CCachedBlockSlotState : public CCachedBlockSlot BOOL FSlotUpdated() const { return m_fSlotUpdated; } BOOL FClusterUpdated() const { return m_fClusterUpdated; } BOOL FSuperceded() const { return m_fSuperceded; } - BOOL FFirstUpdate() const { return Updno() == (UpdateNumber)1; } + BOOL FFirstUpdate() const { return Updno() == updnoFirst; } static void Dump( _In_ const CCachedBlockSlotState& slotst, _In_ CPRINTF* const pcprintf, diff --git a/dev/ese/src/noncore/blockcache/interop/CachedBlock.h b/dev/ese/src/noncore/blockcache/interop/CachedBlock.h index be1b9a1f..bc6c36f9 100644 --- a/dev/ese/src/noncore/blockcache/interop/CachedBlock.h +++ b/dev/ese/src/noncore/blockcache/interop/CachedBlock.h @@ -148,6 +148,17 @@ namespace Internal } } + /// + /// Update Number. + /// + property Internal::Ese::BlockCache::Interop::UpdateNumber UpdateNumber + { + Internal::Ese::BlockCache::Interop::UpdateNumber get() + { + return (Internal::Ese::BlockCache::Interop::UpdateNumber)this->pcbl->Updno(); + } + } + /// static bool operator==( CachedBlock^ a, CachedBlock^ b ) { @@ -182,7 +193,8 @@ namespace Internal ^ this->IsPinned.GetHashCode() ^ this->IsDirty.GetHashCode() ^ this->WasEverDirty.GetHashCode() - ^ this->WasPurged.GetHashCode(); + ^ this->WasPurged.GetHashCode() + ^ this->UpdateNumber.GetHashCode(); } /// @@ -223,6 +235,11 @@ namespace Internal return false; } + if ( this->UpdateNumber != other->UpdateNumber ) + { + return false; + } + return true; } diff --git a/dev/ese/src/noncore/blockcache/interop/UpdateNumber.h b/dev/ese/src/noncore/blockcache/interop/UpdateNumber.h index 6bf6bf34..63e72207 100644 --- a/dev/ese/src/noncore/blockcache/interop/UpdateNumber.h +++ b/dev/ese/src/noncore/blockcache/interop/UpdateNumber.h @@ -25,6 +25,11 @@ namespace Internal ///
Invalid = (UInt16)::updnoInvalid, + /// + /// The first update. + /// + First = (UInt16)::updnoFirst, + /// /// The maximum UpdateNumber. /// diff --git a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx index 3f967674..b3f89ec4 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx @@ -801,8 +801,9 @@ class THashedLRUKCache _In_ const CCachedBlockSlotState& slotstAccepted, _In_ const CCachedBlockSlotState& slotstCurrent ) { - ERR err = JET_errSuccess; - QWORD ibSlab = 0; + ERR err = JET_errSuccess; + QWORD ibSlab = 0; + BOOL fEvictOrInvalidate = fFalse; // ignore chunks that are in an error state @@ -831,9 +832,17 @@ class THashedLRUKCache Call( pcbs->ErrGetPhysicalId( &ibSlab ) ); - if ( m_pc->FHashSlab( ibSlab ) && - slotstCurrent.FSlotUpdated() && - slotstAccepted.FValid() && !slotstCurrent.FValid() ) + fEvictOrInvalidate = ( m_pc->FHashSlab( ibSlab ) && + slotstCurrent.FSlotUpdated() && + slotstAccepted.FValid() && + ( !slotstCurrent.FValid() || + !( slotstAccepted.Cbid().Volumeid() == slotstCurrent.Cbid().Volumeid() && + slotstAccepted.Cbid().Fileid() == slotstCurrent.Cbid().Fileid() && + slotstAccepted.Cbid().Fileserial() == slotstCurrent.Cbid().Fileserial() && + slotstAccepted.Cbid().Cbno() == slotstCurrent.Cbid().Cbno() && + slotstAccepted.Updno() == slotstCurrent.Updno() ) ) ); + + if ( fEvictOrInvalidate ) { if ( slotstAccepted.Clno() == slotstCurrent.Clno() ) { @@ -865,7 +874,27 @@ class THashedLRUKCache } } - // accumulate this slot for the journal entry + // if this slot was evicted or invalidated then accumulate the before image of the slot for the + // journal entry. this will allow us to see the evict/invalidate in the journal and won't cost + // much. this before image will be applied to the slab during recovery but it will be immediately + // overwritten with the after image + + if ( fEvictOrInvalidate ) + { + if ( !slotstCurrent.FValid() ) + { + // don't emit the same slot twice if the evicted/invalidated slot is not immediately reused + } + else + { + // NOTE: only evicted slots can be immediately reused so these are all evicts + + Call( ErrToErr>( m_arrayCachedBlockUpdate.ErrSetEntry( m_arrayCachedBlockUpdate.Size(), + CCachedBlockUpdate( slotstAccepted ) ) ) ); + } + } + + // accumulate the updated slot for the journal entry Call( ErrToErr>( m_arrayCachedBlockUpdate.ErrSetEntry( m_arrayCachedBlockUpdate.Size(), CCachedBlockUpdate( slotstCurrent ) ) ) ); @@ -1077,12 +1106,20 @@ class THashedLRUKCache { public: - static ERR ErrExecute( _In_ THashedLRUKCache* const pc, - _Inout_ ICachedBlockSlab** const ppcbs, - _In_ CHashedLRUKCachedFileTableEntry* const pcfte, - _In_ const COffsets offsets ) + static ERR ErrExecute( _In_ THashedLRUKCache* const pc, + _In_ ICachedBlockSlab* const pcbsHash, + _In_ CHashedLRUKCachedFileTableEntry* const pcfte, + _In_ const COffsets offsets, + _Inout_opt_ ICachedBlockSlab** const ppcbsJournal ) { - return CInvalidateSlabVisitor( pc, ppcbs, pcfte, offsets ).ErrInvalidateSlots(); + ERR err = JET_errSuccess; + CInvalidateSlabVisitor isv( pc, pcbsHash, pcfte, offsets, ppcbsJournal ); + + Call( isv.ErrInvalidateSlots() ); + + HandleError: + isv.CaptureJournalSlab( ppcbsJournal ); + return err; } ~CInvalidateSlabVisitor() @@ -1092,17 +1129,19 @@ class THashedLRUKCache protected: - CInvalidateSlabVisitor( _In_ THashedLRUKCache* const pc, - _Inout_ ICachedBlockSlab** const ppcbs, - _In_ CHashedLRUKCachedFileTableEntry* const pcfte, - _In_ const COffsets offsets ) - : CCachedBlockSlabVisitor( *ppcbs ), + CInvalidateSlabVisitor( _In_ THashedLRUKCache* const pc, + _In_ ICachedBlockSlab* const pcbsHash, + _In_ CHashedLRUKCachedFileTableEntry* const pcfte, + _In_ const COffsets offsets, + _Inout_opt_ ICachedBlockSlab** const ppcbsJournal ) + : CCachedBlockSlabVisitor( pcbsHash ), m_pc( pc ), - m_pcbs( *ppcbs ), + m_pcbsHash( pcbsHash ), m_pcfte( pcfte ), m_offsets( offsets ), - m_pcbsJournal( NULL ) + m_pcbsJournal( *ppcbsJournal ) { + *ppcbsJournal = NULL; } ERR ErrVisitSlots_( _In_ ICachedBlockSlab* const pcbs, @@ -1129,13 +1168,16 @@ class THashedLRUKCache Call( ErrVisitSlots() ); - // update the affected slabs - HandleError: - err = ErrAccumulateError( err, ErrUpdateSlabs() ); return err; } + void CaptureJournalSlab( _Inout_opt_ ICachedBlockSlab** const ppcbsJournal ) + { + *ppcbsJournal = m_pcbsJournal; + m_pcbsJournal = NULL; + } + ERR ErrInvalidateSlot( _In_ ICachedBlockSlab* const pcbs, _In_ const CCachedBlockSlotState& slotstCurrent, _In_ const CCachedBlockSlot& slotNew ) @@ -1175,29 +1217,10 @@ class THashedLRUKCache return err; } - ERR ErrUpdateSlabs() - { - ERR err = JET_errSuccess; - - // if we moved clusters due to invalidating cached data then we must perform our changes in one - // atomic update - - if ( m_pcbsJournal && m_pcbsJournal->FUpdated() ) - { - Call( m_pc->ErrUpdateSlabs( &m_pcbs, &m_pcbsJournal ) ); - } - - // release our journal slab - - HandleError: - m_pc->ReleaseSlab( err, &m_pcbsJournal ); - return err; - } - private: THashedLRUKCache* const m_pc; - ICachedBlockSlab*& m_pcbs; + ICachedBlockSlab* m_pcbsHash; CHashedLRUKCachedFileTableEntry* const m_pcfte; const COffsets m_offsets; ICachedBlockSlab* m_pcbsJournal; @@ -1946,17 +1969,18 @@ class THashedLRUKCache { public: - static ERR ErrExecute( _In_ THashedLRUKCache* const pc, - _In_ CRequest* const prequest, - _Inout_ ICachedBlockSlab** const ppcbs, - _In_ const BOOL fRead, - _In_ const QWORD cbRequested, - _In_ const BOOL fOverrideCachePercentage, - _Out_ QWORD* const pcbProduced, - _Out_ QWORD* const pcbWriteBackFailed ) + static ERR ErrExecute( _In_ THashedLRUKCache* const pc, + _In_ CRequest* const prequest, + _In_ ICachedBlockSlab* const pcbsHash, + _In_ const BOOL fRead, + _In_ const QWORD cbRequested, + _In_ const BOOL fOverrideCachePercentage, + _Inout_opt_ ICachedBlockSlab** const ppcbsJournal, + _Out_ QWORD* const pcbProduced, + _Out_ QWORD* const pcbWriteBackFailed ) { - ERR err = JET_errSuccess; - CInspectSlabVisitor isv( *ppcbs ); + ERR err = JET_errSuccess; + CInspectSlabVisitor isv( pcbsHash ); *pcbProduced = 0; *pcbWriteBackFailed = 0; @@ -1972,14 +1996,20 @@ class THashedLRUKCache { CCleanSlabVisitor csv( pc, prequest, - ppcbs, + pcbsHash, fRead, cbRequested, fOverrideCachePercentage, + ppcbsJournal, isv.CbTotal(), isv.CbWriteCache(), isv.CbReadCache() ); - Call( csv.ErrTryCleanSlots() ); + + err = csv.ErrTryCleanSlots(); + + csv.CaptureJournalSlab( ppcbsJournal ); + + Call( err ); *pcbProduced = csv.CbProduced(); *pcbWriteBackFailed = csv.CbWriteBackFailed(); @@ -2003,19 +2033,20 @@ class THashedLRUKCache protected: - CCleanSlabVisitor( _In_ THashedLRUKCache* const pc, - _In_ CRequest* const prequest, - _Inout_ ICachedBlockSlab** const ppcbs, - _In_ const BOOL fRead, - _In_ const QWORD cbRequested, - _In_ const BOOL fOverrideCachePercentage, - _In_ const QWORD cbTotal, - _In_ const QWORD cbWriteCache, - _In_ const QWORD cbReadCache ) - : CCachedBlockSlabVisitor( *ppcbs ), + CCleanSlabVisitor( _In_ THashedLRUKCache* const pc, + _In_ CRequest* const prequest, + _In_ ICachedBlockSlab* const pcbsHash, + _In_ const BOOL fRead, + _In_ const QWORD cbRequested, + _In_ const BOOL fOverrideCachePercentage, + _Inout_opt_ ICachedBlockSlab** const ppcbsJournal, + _In_ const QWORD cbTotal, + _In_ const QWORD cbWriteCache, + _In_ const QWORD cbReadCache ) + : CCachedBlockSlabVisitor( pcbsHash ), m_pc( pc ), m_prequest( prequest ), - m_pcbs( *ppcbs ), + m_pcbsHash( pcbsHash ), m_fRead( fRead ), m_cbRequested( cbRequested ), m_fOverrideCachePercentage( fOverrideCachePercentage ), @@ -2035,8 +2066,9 @@ class THashedLRUKCache m_cbWriteBackFailed( 0 ), m_cbEvicted( 0 ), m_cbInvalidatePending( 0 ), - m_pcbsJournal( NULL ) + m_pcbsJournal( *ppcbsJournal ) { + *ppcbsJournal = NULL; } ERR ErrVisitSlots_( _In_ ICachedBlockSlab* const pcbs, @@ -2071,10 +2103,7 @@ class THashedLRUKCache Call( ErrPerformInvalidation() ); - // update the affected slabs - HandleError: - err = ErrAccumulateError( err, ErrUpdateSlabs() ); OSTrace( JET_tracetagBlockCacheOperations, OSFormat( "C=%s R=0x%016I64x Clean cbRequested=%llu cbProduced=%llu (cbInvalid=%llu cbWriteBack=%llu cbEvicted=%llu cbInvalidated=%llu) err=%d", OSFormatFileId( m_pc ), @@ -2089,6 +2118,12 @@ class THashedLRUKCache return err; } + void CaptureJournalSlab( _Inout_ ICachedBlockSlab** const ppcbsJournal ) + { + *ppcbsJournal = m_pcbsJournal; + m_pcbsJournal = NULL; + } + QWORD CbProduced() const { return m_cbInvalid + @@ -2305,7 +2340,7 @@ class THashedLRUKCache // register the write back to be performed later - Call( CWriteBack::ErrRegister( m_pc, m_prequest, m_pcbs, m_ilWriteBack, slotstCurrent, pcfte, offsets ) ); + Call( CWriteBack::ErrRegister( m_pc, m_prequest, m_pcbsHash, m_ilWriteBack, slotstCurrent, pcfte, offsets ) ); HandleError: return err; @@ -2426,11 +2461,11 @@ class THashedLRUKCache // mark all the slots we just successfully wrote back as clean - Call( CCleanSlotsSlabVisitor::ErrExecute( m_pcbs, &m_ilWriteBack ) ); + Call( CCleanSlotsSlabVisitor::ErrExecute( m_pcbsHash, &m_ilWriteBack ) ); // evict all the slots that we just marked as clean - Call( CEvictCleanedSlotsSlabVisitor::ErrExecute( m_pc, m_pcbs, &m_ilWriteBack, &m_pcbsJournal ) ); + Call( CEvictCleanedSlotsSlabVisitor::ErrExecute( m_pc, m_pcbsHash, &m_ilWriteBack, &m_pcbsJournal ) ); // count how many write backs failed @@ -2459,30 +2494,9 @@ class THashedLRUKCache // invalidate all the slots that contain data for deleted cached files - Call( CInvalidateAbandonedSlotsSlabVisitor::ErrExecute( m_pc, m_pcbs, &m_pcbsJournal ) ); - - HandleError: - return err; - } - - ERR ErrUpdateSlabs() - { - ERR err = JET_errSuccess; - - // if we moved clusters due to evicting cached data then we must perform our changes in one atomic - // update - - if ( m_pcbsJournal && m_pcbsJournal->FUpdated() ) - { - // update both slabs atomically - - Call( m_pc->ErrUpdateSlabs( &m_pcbs, &m_pcbsJournal ) ); - } - - // release our journal slab + Call( CInvalidateAbandonedSlotsSlabVisitor::ErrExecute( m_pc, m_pcbsHash, &m_pcbsJournal ) ); HandleError: - m_pc->ReleaseSlab( err, &m_pcbsJournal ); return err; } @@ -2961,7 +2975,7 @@ class THashedLRUKCache THashedLRUKCache* const m_pc; CRequest* const m_prequest; - ICachedBlockSlab*& m_pcbs; + ICachedBlockSlab* const m_pcbsHash; const BOOL m_fRead; const QWORD m_cbRequested; const BOOL m_fOverrideCachePercentage; @@ -4088,7 +4102,13 @@ class THashedLRUKCache // // we track unmodified blocks due to a limitation with add below - if ( slotstAccepted.FValid() && !slotstCurrent.FValid() ) + if ( slotstAccepted.FValid() && + ( !slotstCurrent.FValid() || + !( slotstAccepted.Cbid().Volumeid() == slotstCurrent.Cbid().Volumeid() && + slotstAccepted.Cbid().Fileid() == slotstCurrent.Cbid().Fileid() && + slotstAccepted.Cbid().Fileserial() == slotstCurrent.Cbid().Fileserial() && + slotstAccepted.Cbid().Cbno() == slotstCurrent.Cbid().Cbno() && + slotstAccepted.Updno() == slotstCurrent.Updno() ) ) ) { if ( slotstAccepted.Cbid().Volumeid() != volumeidInvalid && slotstAccepted.Cbid().Fileid() != fileidInvalid && @@ -4111,7 +4131,13 @@ class THashedLRUKCache // not previously cached just by looking at this one slot. ideally we would detect this and track // only one - if ( !slotstAccepted.FValid() && slotstCurrent.FValid() ) + if ( ( !slotstAccepted.FValid() || + !( slotstAccepted.Cbid().Volumeid() == slotstCurrent.Cbid().Volumeid() && + slotstAccepted.Cbid().Fileid() == slotstCurrent.Cbid().Fileid() && + slotstAccepted.Cbid().Fileserial() == slotstCurrent.Cbid().Fileserial() && + slotstAccepted.Cbid().Cbno() == slotstCurrent.Cbid().Cbno() && + slotstAccepted.Updno() == slotstCurrent.Updno() ) ) && + slotstCurrent.FValid() ) { if ( slotstCurrent.Cbid().Volumeid() != volumeidInvalid && slotstCurrent.Cbid().Fileid() != fileidInvalid && @@ -4925,12 +4951,21 @@ class THashedLRUKCache ERR ErrRedoJournalEntries(); ERR ErrRedoJournalEntry( _In_ const CQueuedJournalEntry* const pqje ); ERR ErrRedoCacheUpdateJournalEntry( _In_ const CQueuedJournalEntry* const pqje ); - void LogCacheUpdateJournalEntry( _In_ const CQueuedJournalEntry* const pqje, - _In_ const QWORD ibSlab ); + void TraceCacheUpdateJournalEntry( _In_ const CQueuedJournalEntry* const pqje, + _In_ const QWORD ibSlab ); + void TraceCacheUpdateJournalEntry( _In_ const CCacheUpdateJournalEntry* const pcuje, + _In_ const JournalPosition jpos, + _In_ const QWORD ibSlab = 0, + _In_ const BOOL fRedo = fFalse ); + void TraceCacheUpdateJournalEntryInternal( _In_ const CCacheUpdateJournalEntry* const pcuje, + _In_ const JournalPosition jpos, + _In_ const QWORD ibSlab, + _In_ const BOOL fRedo ); void ReleaseJournalEntries(); ERR ErrChangeSlabs( _In_ const QWORD ibSlab, - _Inout_ ICachedBlockSlab** const ppcbs, + _Inout_ ICachedBlockSlab** const ppcbsHash, + _Inout_opt_ ICachedBlockSlab** const ppcbsJournal = NULL, _Out_opt_ BOOL* const pfChangedSlab = NULL ); ERR ErrUpdateSlab( _Inout_opt_ ICachedBlockSlab** const ppcbs ) @@ -5027,29 +5062,34 @@ class THashedLRUKCache void RequestIO( _In_ CRequest* const prequestIO, _In_ const BOOL fCachedFile, _In_ const BOOL fCachingFile ); - void RequestRead( _In_ CRequest* const prequest, - _In_ const BOOL fCachedFile, - _In_ const BOOL fCachingFile, - _Inout_ ICachedBlockSlab** const ppcbs ); - void RequestFinalizeRead( _In_ CRequest* const prequestIO, - _Inout_ ICachedBlockSlab** const ppcbs ); + void RequestRead( _In_ CRequest* const prequest, + _In_ const BOOL fCachedFile, + _In_ const BOOL fCachingFile, + _Inout_opt_ ICachedBlockSlab** const ppcbsHash, + _Inout_opt_ ICachedBlockSlab** const ppcbsJournal ); + void RequestFinalizeRead( _In_ CRequest* const prequestIO, + _Inout_opt_ ICachedBlockSlab** const ppcbsHash, + _Inout_opt_ ICachedBlockSlab** const ppcbsJournal ); ERR ErrUnexpectedDataReadFailure( _In_ CHashedLRUKCachedFileTableEntry* const pcfte, _In_ const COffsets offsets, _In_ const char* const szFunction, _In_ const ERR errFromCall, _In_ const ERR errToReturn ); - void RequestWrite( _In_ CRequest* const prequest, - _In_ const BOOL fCachedFile, - _In_ const BOOL fCachingFile, - _Inout_ ICachedBlockSlab** const ppcbs ); - void RequestFinalizeWrite( _In_ CRequest* const prequestIO, - _Inout_ ICachedBlockSlab** const ppcbs ); - - ERR ErrCleanSlab( _In_ CRequest* const prequest, - _Inout_ ICachedBlockSlab** const ppcbs, - _In_ const BOOL fRead, - _In_ const QWORD ib, - _Inout_ QWORD* const pcbClean ); + void RequestWrite( _In_ CRequest* const prequest, + _In_ const BOOL fCachedFile, + _In_ const BOOL fCachingFile, + _Inout_opt_ ICachedBlockSlab** const ppcbsHash, + _Inout_opt_ ICachedBlockSlab** const ppcbsJournal ); + void RequestFinalizeWrite( _In_ CRequest* const prequestIO, + _Inout_opt_ ICachedBlockSlab** const ppcbsHash, + _Inout_opt_ ICachedBlockSlab** const ppcbsJournal ); + + ERR ErrCleanSlab( _In_ CRequest* const prequest, + _In_ ICachedBlockSlab* const pcbsHash, + _In_ const BOOL fRead, + _In_ const QWORD ib, + _Inout_opt_ ICachedBlockSlab** const ppcbsJournal, + _Inout_ QWORD* const pcbClean ); ERR ErrIsPossiblyCached( _In_ CHashedLRUKCachedFileTableEntry* pcfte, _In_ const QWORD ibCachedBlock, @@ -5958,7 +5998,8 @@ ERR THashedLRUKCache::ErrInvalidate( _In_ const VolumeId volumeid, BYTE* rgbSlabBitmap = NULL; IBitmapAPI* pbmSlab = NULL; QWORD cSlabInvalidated = 0; - ICachedBlockSlab* pcbs = NULL; + ICachedBlockSlab* pcbsHash = NULL; + ICachedBlockSlab* pcbsJournal = NULL; // get the cached file @@ -6057,7 +6098,7 @@ ERR THashedLRUKCache::ErrInvalidate( _In_ const VolumeId volumeid, // change to the slab containing this block - Call( ErrChangeSlabs( ibSlab, &pcbs ) ); + Call( ErrChangeSlabs( ibSlab, &pcbsHash, &pcbsJournal ) ); // determine if we have invalidated this slab previously @@ -6080,15 +6121,20 @@ ERR THashedLRUKCache::ErrInvalidate( _In_ const VolumeId volumeid, // invalidate all matching cached blocks from this slab - Call( CInvalidateSlabVisitor::ErrExecute( this, &pcbs, pcfte, offsets ) ); + Call( CInvalidateSlabVisitor::ErrExecute( this, pcbsHash, pcfte, offsets, &pcbsJournal ) ); // note that we have invalidated this slab cSlabInvalidated++; } + // perform any remaining slab update + + Call( ErrUpdateSlabs( &pcbsHash, &pcbsJournal ) ); + HandleError: - ReleaseSlab( err, &pcbs ); + ReleaseSlab( err, &pcbsHash ); + ReleaseSlab( err, &pcbsJournal ); if ( piorl ) { piorl->Release(); @@ -6462,8 +6508,12 @@ ERR THashedLRUKCache::ErrDumpJournalEntry( _In_ const CQueuedJournalEntry* co for ( ULONG icbu = 0; icbu < pcuje->Ccbu(); icbu++ ) { - const CCachedBlockUpdate* pcbu = pcuje->Pcbu( icbu ); - ERR errChunk = JET_errSuccess; + const CCachedBlockUpdate* const pcbu = pcuje->Pcbu( icbu ); + const CCachedBlockUpdate* const pcbuNext = pcuje->Pcbu( icbu + 1 ); + const BOOL fEvictBeforeImage = ( pcbuNext && + pcbu->Chno() == pcbuNext->Chno() && + pcbu->Slno() == pcbuNext->Slno() ); + ERR errChunk = JET_errSuccess; if ( ibSlab != pcbu->IbSlab() ) { @@ -6478,7 +6528,7 @@ ERR THashedLRUKCache::ErrDumpJournalEntry( _In_ const CQueuedJournalEntry* co Call( CChunkStatus::ErrExecute( pcbs, pcbu->Chno(), &errChunk ) ); } - if ( !pcbu->FSlotUpdated() ) + if ( !( pcbu->FSlotUpdated() || fEvictBeforeImage ) ) { if ( errChunk >= JET_errSuccess ) { @@ -6487,9 +6537,10 @@ ERR THashedLRUKCache::ErrDumpJournalEntry( _In_ const CQueuedJournalEntry* co } (*pcprintf)( "\n" ); - (*pcprintf)( " %c%c ", + (*pcprintf)( " %c%c%c ", pcbu->FSlotUpdated() ? 'U' : '_', - pcbu->FClusterReference() ? 'R' : '_' ); + pcbu->FClusterReference() ? 'R' : '_', + fEvictBeforeImage ? 'E' : '_' ); CCachedBlockSlot::Dump( *pcbu, pcprintf, Pfident() ); } } @@ -7177,7 +7228,7 @@ ERR THashedLRUKCache::ErrRedoCacheUpdateJournalEntry( _In_ const CQueuedJourn Call( ErrScheduleSlabForWriteBack( pcbs, jpos, jposEnd ) ); - LogCacheUpdateJournalEntry( pqje, ibSlab ); + TraceCacheUpdateJournalEntry( pqje, ibSlab ); // release the slab @@ -7200,7 +7251,7 @@ ERR THashedLRUKCache::ErrRedoCacheUpdateJournalEntry( _In_ const CQueuedJourn Call( ErrScheduleSlabForWriteBack( pcbs, jpos, jposEnd ) ); - LogCacheUpdateJournalEntry( pqje, ibSlab ); + TraceCacheUpdateJournalEntry( pqje, ibSlab ); // release the slab @@ -7210,27 +7261,50 @@ HandleError: } template -void THashedLRUKCache::LogCacheUpdateJournalEntry( _In_ const CQueuedJournalEntry* const pqje, +void THashedLRUKCache::TraceCacheUpdateJournalEntry( _In_ const CQueuedJournalEntry* const pqje, _In_ const QWORD ibSlab ) { - const CCacheUpdateJournalEntry* const pcuje = (const CCacheUpdateJournalEntry*)pqje->Pje(); - const JournalPosition jpos = pqje->Jpos(); + TraceCacheUpdateJournalEntry( (const CCacheUpdateJournalEntry*)pqje->Pje(), pqje->Jpos(), ibSlab, fTrue ); +} +template +void THashedLRUKCache::TraceCacheUpdateJournalEntry( _In_ const CCacheUpdateJournalEntry* const pcuje, + _In_ const JournalPosition jpos, + _In_ const QWORD ibSlab, + _In_ const BOOL fRedo ) +{ if ( FOSTraceTagEnabled( JET_tracetagBlockCacheOperations ) ) { - for ( ULONG icbu = 0; icbu < pcuje->Ccbu(); icbu++ ) + TraceCacheUpdateJournalEntryInternal( pcuje, jpos, ibSlab, fRedo ); + } +} + +template +void THashedLRUKCache::TraceCacheUpdateJournalEntryInternal( _In_ const CCacheUpdateJournalEntry* const pcuje, + _In_ const JournalPosition jpos, + _In_ const QWORD ibSlab, + _In_ const BOOL fRedo ) +{ + for ( ULONG icbu = 0; icbu < pcuje->Ccbu(); icbu++ ) + { + const CCachedBlockUpdate* const pcbu = pcuje->Pcbu( icbu ); + const CCachedBlockUpdate* const pcbuNext = pcuje->Pcbu( icbu + 1 ); + + if ( ibSlab == 0 || pcbu->IbSlab() == ibSlab ) { - const CCachedBlockUpdate* const pcbu = pcuje->Pcbu( icbu ); + const BOOL fEvictBeforeImage = pcbuNext && pcbu->Chno() == pcbuNext->Chno() && pcbu->Slno() == pcbuNext->Slno(); - if ( pcbu->IbSlab() == ibSlab && pcbu->FSlotUpdated() ) + if ( pcbu->FSlotUpdated() || fEvictBeforeImage ) { OSTrace( JET_tracetagBlockCacheOperations, - OSFormat( "C=%s 0x%016I64x CacheUpdate %c%c %s (REDO)", + OSFormat( "C=%s 0x%016I64x CacheUpdate %c%c%c %s%s", OSFormatFileId( this ), QWORD( jpos ), pcbu->FSlotUpdated() ? 'U' : '_', pcbu->FClusterReference() ? 'R' : '_', - OSFormat( *pcbu ) ) ); + fEvictBeforeImage ? 'E' : '_', + OSFormat( *pcbu ), + fRedo ? " (REDO)" : "" ) ); } } } @@ -7248,15 +7322,21 @@ void THashedLRUKCache::ReleaseJournalEntries() template ERR THashedLRUKCache::ErrChangeSlabs( _In_ const QWORD ibSlab, - _Inout_ ICachedBlockSlab** const ppcbs, + _Inout_ ICachedBlockSlab** const ppcbsHash, + _Inout_opt_ ICachedBlockSlab** const ppcbsJournal, _Out_opt_ BOOL* const pfChangedSlab ) { ERR err = JET_errSuccess; - ICachedBlockSlab* pcbs = *ppcbs; + ICachedBlockSlab* pcbsHash = *ppcbsHash; + ICachedBlockSlab* pcbsJournal = ppcbsJournal ? *ppcbsJournal : NULL; QWORD ibSlabCurrent = 0; BOOL fChangedSlab = fFalse; - *ppcbs = NULL; + *ppcbsHash = NULL; + if ( ppcbsJournal ) + { + *ppcbsJournal = NULL; + } if ( pfChangedSlab ) { *pfChangedSlab = fFalse; @@ -7264,46 +7344,57 @@ ERR THashedLRUKCache::ErrChangeSlabs( _In_ const QWORD // we already have a slab open - if ( pcbs ) + if ( pcbsHash ) { // if this slab cannot contain this cached block then we need to move to the correct one - Call( pcbs->ErrGetPhysicalId( &ibSlabCurrent ) ); + Call( pcbsHash->ErrGetPhysicalId( &ibSlabCurrent ) ); if ( ibSlab != ibSlabCurrent ) { - // update this slab + // update this slab and any associated journal slab - Call( ErrUpdateSlab( &pcbs ) ); + Call( ErrUpdateSlabs( &pcbsHash, &pcbsJournal ) ); - // release this slab + // release this slab and any associated journal slab - ReleaseSlab( JET_errSuccess, &pcbs ); + ReleaseSlab( JET_errSuccess, &pcbsHash ); + ReleaseSlab( JET_errSuccess, &pcbsJournal ); } } // if we don't have a slab open then open the one that can hold this cached block - if ( !pcbs ) + if ( !pcbsHash ) { - Call( ErrGetSlab( ibSlab, &pcbs ) ); + Call( ErrGetSlab( ibSlab, &pcbsHash ) ); fChangedSlab = fTrue; } // return the slab that can hold this cached block - *ppcbs = pcbs; - pcbs = NULL; + *ppcbsHash = pcbsHash; + pcbsHash = NULL; + if ( ppcbsJournal ) + { + *ppcbsJournal = pcbsJournal; + pcbsJournal = NULL; + } if ( pfChangedSlab ) { *pfChangedSlab = fChangedSlab; } HandleError: - ReleaseSlab( err, &pcbs ); + ReleaseSlab( err, &pcbsHash ); + ReleaseSlab( err, &pcbsJournal ); if ( err < JET_errSuccess ) { - ReleaseSlab( err, ppcbs ); + ReleaseSlab( err, ppcbsHash ); + if ( ppcbsJournal ) + { + ReleaseSlab( err, ppcbsJournal ); + } if ( pfChangedSlab ) { *pfChangedSlab = fFalse; @@ -7380,23 +7471,7 @@ ERR THashedLRUKCache::ErrUpdateSlabs( _Inout_opt_ ICachedBlockSlab** const Call( m_pj->ErrAppendEntry( _countof( rgjb ), rgjb, &jpos, &jposEnd ) ); } - for ( ULONG icbu = 0; icbu < pcuje->Ccbu(); icbu++ ) - { - const CCachedBlockUpdate* pcbu = pcuje->Pcbu( icbu ); - - if ( !pcbu->FSlotUpdated() ) - { - continue; - } - - OSTrace( JET_tracetagBlockCacheOperations, - OSFormat( "C=%s 0x%016I64x CacheUpdate %c%c %s", - OSFormatFileId( this ), - QWORD( jpos ), - pcbu->FSlotUpdated() ? 'U' : '_', - pcbu->FClusterReference() ? 'R' : '_', - OSFormat( *pcbu ) ) ); - } + TraceCacheUpdateJournalEntry( pcuje, jpos ); // schedule slabs for write back @@ -8880,7 +8955,8 @@ void THashedLRUKCache::RequestIO( _In_ CRequest* const prequestIO ) template void THashedLRUKCache::RequestFinalizeIO( _In_ CRequest* const prequestIO ) { - ICachedBlockSlab* pcbs = NULL; + ICachedBlockSlab* pcbsHash = NULL; + ICachedBlockSlab* pcbsJournal = NULL; // loop through every request in this IO @@ -8899,11 +8975,11 @@ void THashedLRUKCache::RequestFinalizeIO( _In_ CRequest* const prequestIO ) if ( prequestIO->FRead() ) { - RequestFinalizeRead( prequest, &pcbs ); + RequestFinalizeRead( prequest, &pcbsHash, &pcbsJournal ); } else { - RequestFinalizeWrite( prequest, &pcbs ); + RequestFinalizeWrite( prequest, &pcbsHash, &pcbsJournal ); } } } @@ -8915,7 +8991,7 @@ void THashedLRUKCache::RequestFinalizeIO( _In_ CRequest* const prequestIO ) // failure to update this particular slab. this is OK because this means the journal and thus the entire cache // are about to go down - const ERR err = ErrUpdateSlab( &pcbs ); + const ERR err = ErrUpdateSlabs( &pcbsHash, &pcbsJournal ); if ( err < JET_errSuccess ) { if ( !prequestIO->FRead() ) @@ -8926,7 +9002,8 @@ void THashedLRUKCache::RequestFinalizeIO( _In_ CRequest* const prequestIO ) // release the current slab - ReleaseSlab( err, &pcbs ); + ReleaseSlab( err, &pcbsHash ); + ReleaseSlab( err, &pcbsJournal ); } template @@ -8934,7 +9011,8 @@ void THashedLRUKCache::RequestIO( _In_ CRequest* const prequestIO, _In_ const BOOL fCachedFile, _In_ const BOOL fCachingFile ) { - ICachedBlockSlab* pcbs = NULL; + ICachedBlockSlab* pcbsHash = NULL; + ICachedBlockSlab* pcbsJournal = NULL; // loop through every request in this IO @@ -8948,11 +9026,11 @@ void THashedLRUKCache::RequestIO( _In_ CRequest* const prequestIO, if ( prequest->FRead() ) { - RequestRead( prequest, fCachedFile, fCachingFile, &pcbs ); + RequestRead( prequest, fCachedFile, fCachingFile, &pcbsHash, &pcbsJournal ); } else { - RequestWrite( prequest, fCachedFile, fCachingFile, &pcbs ); + RequestWrite( prequest, fCachedFile, fCachingFile, &pcbsHash, &pcbsJournal ); } } @@ -8962,7 +9040,7 @@ void THashedLRUKCache::RequestIO( _In_ CRequest* const prequestIO, // failure to update this particular slab. this is OK because this means the journal and thus the entire cache // are about to go down - const ERR err = ErrUpdateSlab( &pcbs ); + const ERR err = ErrUpdateSlabs( &pcbsHash, &pcbsJournal ); if ( err < JET_errSuccess ) { FailIO( prequestIO, err ); @@ -8970,17 +9048,20 @@ void THashedLRUKCache::RequestIO( _In_ CRequest* const prequestIO, // release the current slab - ReleaseSlab( err, &pcbs ); + ReleaseSlab( err, &pcbsHash ); + ReleaseSlab( err, &pcbsJournal ); } template -void THashedLRUKCache::RequestRead( _In_ CRequest* const prequest, - _In_ const BOOL fCachedFile, - _In_ const BOOL fCachingFile, - _Inout_ ICachedBlockSlab** const ppcbs ) +void THashedLRUKCache::RequestRead( _In_ CRequest* const prequest, + _In_ const BOOL fCachedFile, + _In_ const BOOL fCachingFile, + _Inout_opt_ ICachedBlockSlab** const ppcbsHash, + _Inout_opt_ ICachedBlockSlab** const ppcbsJournal ) { ERR err = JET_errSuccess; - ICachedBlockSlab*& pcbs = *ppcbs; + ICachedBlockSlab*& pcbsHash = *ppcbsHash; + ICachedBlockSlab*& pcbsJournal = *ppcbsJournal; QWORD ibCachedFileDeferred = 0; size_t cbCachedFileDeferred = 0; @@ -9016,11 +9097,11 @@ void THashedLRUKCache::RequestRead( _In_ CRequest* const preq // NOTE: this may experience sync reads from the caching file // NOTE: this may wait for another request to finish accessing the slab - Call( ErrChangeSlabs( ibSlab, &pcbs ) ); + Call( ErrChangeSlabs( ibSlab, &pcbsHash, &pcbsJournal ) ); // determine if the block is already cached - Call( pcbs->ErrGetSlotForRead( cbid, &slot ) ); + Call( pcbsHash->ErrGetSlotForRead( cbid, &slot ) ); fCached = slot.FValid(); } @@ -9048,7 +9129,7 @@ void THashedLRUKCache::RequestRead( _In_ CRequest* const preq { // read the cluster into the output buffer - Call( prequest->ErrReadCluster( pcbs, slot, cbCachedBlock, pbCachedBlock ) ); + Call( prequest->ErrReadCluster( pcbsHash, slot, cbCachedBlock, pbCachedBlock ) ); // we verify the cluster contents in RequestFinalizeRead @@ -9103,11 +9184,13 @@ HandleError: } template -void THashedLRUKCache::RequestFinalizeRead( _In_ CRequest* const prequest, - _Inout_ ICachedBlockSlab** const ppcbs ) +void THashedLRUKCache::RequestFinalizeRead( _In_ CRequest* const prequest, + _Inout_opt_ ICachedBlockSlab** const ppcbsHash, + _Inout_opt_ ICachedBlockSlab** const ppcbsJournal ) { ERR err = JET_errSuccess; - ICachedBlockSlab*& pcbs = *ppcbs; + ICachedBlockSlab*& pcbsHash = *ppcbsHash; + ICachedBlockSlab*& pcbsJournal = *ppcbsJournal; BOOL fChangedSlab = fFalse; QWORD cbClean = 0; @@ -9146,13 +9229,13 @@ void THashedLRUKCache::RequestFinalizeRead( _In_ CRequest* const { // get the slab for this cluster - Call( ErrChangeSlabs( ibSlab, &pcbs, &fChangedSlab ) ); + Call( ErrChangeSlabs( ibSlab, &pcbsHash, &pcbsJournal, &fChangedSlab ) ); cbClean = fChangedSlab ? 0 : cbClean; // determine if the block is already cached - Call( pcbs->ErrGetSlotForRead( cbid, &slot ) ); + Call( pcbsHash->ErrGetSlotForRead( cbid, &slot ) ); fCached = slot.FValid(); } @@ -9162,7 +9245,7 @@ void THashedLRUKCache::RequestFinalizeRead( _In_ CRequest* const { // verify the data we read - err = pcbs->ErrVerifyCluster( slot, cbCachedBlock, pbCachedBlock ); + err = pcbsHash->ErrVerifyCluster( slot, cbCachedBlock, pbCachedBlock ); if ( err < JET_errSuccess ) { Error( ErrUnexpectedDataReadFailure( prequest->Pcfte(), @@ -9178,7 +9261,7 @@ void THashedLRUKCache::RequestFinalizeRead( _In_ CRequest* const if ( fCacheIfPossible ) { - Call( pcbs->ErrUpdateSlot( slot ) ); + Call( pcbsHash->ErrUpdateSlot( slot ) ); } } @@ -9190,21 +9273,21 @@ void THashedLRUKCache::RequestFinalizeRead( _In_ CRequest* const // // NOTE: this can wait on cached file IO if the async clean process has fallen behind - Call( ErrCleanSlab( prequest, &pcbs, fTrue, ibCachedBlock, &cbClean ) ); + Call( ErrCleanSlab( prequest, pcbsHash, fTrue, ibCachedBlock, &pcbsJournal, &cbClean ) ); // try to get a slot to cache this cluster - Call( pcbs->ErrGetSlotForCache( cbid, cbCachedBlock, pbCachedBlock, &slot ) ); + Call( pcbsHash->ErrGetSlotForCache( cbid, cbCachedBlock, pbCachedBlock, &slot ) ); if ( slot.FValid() ) { // update the slot corresponding to this cluster - Call( pcbs->ErrUpdateSlot( slot ) ); + Call( pcbsHash->ErrUpdateSlot( slot ) ); cbClean -= cbCachedBlock; // write the data we are caching to the cluster - Call( prequest->ErrWriteCluster( pcbs, slot, cbCachedBlock, pbCachedBlock ) ); + Call( prequest->ErrWriteCluster( pcbsHash, slot, cbCachedBlock, pbCachedBlock ) ); } } } @@ -9251,13 +9334,15 @@ ERR THashedLRUKCache::ErrUnexpectedDataReadFailure( _In_ CHashedLRUKCachedFi } template -void THashedLRUKCache::RequestWrite( _In_ CRequest* const prequest, - _In_ const BOOL fCachedFile, - _In_ const BOOL fCachingFile, - _Inout_ ICachedBlockSlab** const ppcbs ) +void THashedLRUKCache::RequestWrite( _In_ CRequest* const prequest, + _In_ const BOOL fCachedFile, + _In_ const BOOL fCachingFile, + _Inout_opt_ ICachedBlockSlab** const ppcbsHash, + _Inout_opt_ ICachedBlockSlab** const ppcbsJournal ) { ERR err = JET_errSuccess; - ICachedBlockSlab*& pcbs = *ppcbs; + ICachedBlockSlab*& pcbsHash = *ppcbsHash; + ICachedBlockSlab*& pcbsJournal = *ppcbsJournal; BOOL fChangedSlab = fFalse; QWORD cbClean = 0; QWORD ibCachedFileDeferred = 0; @@ -9307,7 +9392,7 @@ void THashedLRUKCache::RequestWrite( _In_ CRequest* const preq // NOTE: this may experience sync reads from the caching file // NOTE: this may wait for another request to finish accessing the slab - Call( ErrChangeSlabs( ibSlab, &pcbs, &fChangedSlab ) ); + Call( ErrChangeSlabs( ibSlab, &pcbsHash, &pcbsJournal, &fChangedSlab ) ); if ( fChangedSlab && ibCachedBlock % min( m_pch->CbCachedFilePerSlab(), prequest->Pcfte()->CbBlockSize() ) ) { @@ -9318,7 +9403,7 @@ void THashedLRUKCache::RequestWrite( _In_ CRequest* const preq // determine if the block is already cached - Call( pcbs->ErrGetSlotForRead( cbid, &slot ) ); + Call( pcbsHash->ErrGetSlotForRead( cbid, &slot ) ); fCached = slot.FValid(); } @@ -9355,12 +9440,12 @@ void THashedLRUKCache::RequestWrite( _In_ CRequest* const preq // // NOTE: this can wait on cached file IO if the async clean process has fallen behind - const BOOL fUpdatedBeforeClean = pcbs->FUpdated(); + const BOOL fUpdatedBeforeClean = pcbsHash->FUpdated(); - Call( ErrCleanSlab( prequest, &pcbs, fFalse, ibCachedBlock, &cbClean ) ); + Call( ErrCleanSlab( prequest, pcbsHash, fFalse, ibCachedBlock, &pcbsJournal, &cbClean ) ); if ( fUpdatedBeforeClean && - !pcbs->FUpdated() && + !pcbsHash->FUpdated() && ibCachedBlock % min( m_pch->CbCachedFilePerSlab(), prequest->Pcfte()->CbBlockSize() ) != 0 ) { BlockCacheNotableEvent( "TornWriteOpportunity2" ); @@ -9368,7 +9453,7 @@ void THashedLRUKCache::RequestWrite( _In_ CRequest* const preq // try to get a slot to write this cluster - Call( pcbs->ErrGetSlotForWrite( cbid, cbCachedBlock, pbCachedBlock, &slot ) ); + Call( pcbsHash->ErrGetSlotForWrite( cbid, cbCachedBlock, pbCachedBlock, &slot ) ); if ( !slot.FValid() ) { Error( ErrBlockCacheInternalError( "HashedLRUKCacheRequestWriteNoSlotAvailable" ) ); @@ -9384,12 +9469,12 @@ void THashedLRUKCache::RequestWrite( _In_ CRequest* const preq // update the slot corresponding to this cluster - Call( pcbs->ErrUpdateSlot( slot ) ); + Call( pcbsHash->ErrUpdateSlot( slot ) ); cbClean -= cbCachedBlock; // write the data we are caching to the cluster - Call( prequest->ErrWriteCluster( pcbs, slot, cbCachedBlock, pbCachedBlock ) ); + Call( prequest->ErrWriteCluster( pcbsHash, slot, cbCachedBlock, pbCachedBlock ) ); } } @@ -9440,8 +9525,9 @@ HandleError: } template -void THashedLRUKCache::RequestFinalizeWrite( _In_ CRequest* const prequest, - _Inout_ ICachedBlockSlab** const ppcbs ) +void THashedLRUKCache::RequestFinalizeWrite( _In_ CRequest* const prequest, + _Inout_opt_ ICachedBlockSlab** const ppcbsHash, + _Inout_opt_ ICachedBlockSlab** const ppcbsJournal ) { // if we just wrote to a sparse file then try to update its sparse map to reflect the write so that we can cache // it next time. it is ok if this fails (due to OOM) because that will only result in a performance drop @@ -9467,11 +9553,12 @@ void THashedLRUKCache::RequestFinalizeWrite( _In_ CRequest* const } template -ERR THashedLRUKCache::ErrCleanSlab( _In_ CRequest* const prequest, - _Inout_ ICachedBlockSlab** const ppcbs, - _In_ const BOOL fRead, - _In_ const QWORD ib, - _Inout_ QWORD* const pcbClean ) +ERR THashedLRUKCache::ErrCleanSlab( _In_ CRequest* const prequest, + _In_ ICachedBlockSlab* const pcbsHash, + _In_ const BOOL fRead, + _In_ const QWORD ib, + _Inout_opt_ ICachedBlockSlab** const ppcbsJournal, + _Inout_ QWORD* const pcbClean ) { ERR err = JET_errSuccess; @@ -9508,10 +9595,11 @@ ERR THashedLRUKCache::ErrCleanSlab( _In_ CRequest* const preq Call( CCleanSlabVisitor::ErrExecute( this, prequest, - ppcbs, + pcbsHash, fRead, cbClean + cbWriteBackFailedPrev, fOverrideCachePercentage, + ppcbsJournal, pcbClean, &cbWriteBackFailed ) ); From 1d8473543b95a22f63fd92effb1c1353b1434c12 Mon Sep 17 00:00:00 2001 From: Anil Ruia Date: Fri, 11 Nov 2022 09:08:26 +0000 Subject: [PATCH 097/102] Do not do dirty cache keep alive if we are close to checkpoint too deep limit at end of recovery This means that the first operation after JetInit will not fail with CheckpointDepthTooDeep again. We could potentially optimize this by just running an iteration of checkpoint maintenance and update but that is not easily to run synchronously. [Substrate:e406536518b1d85abcb05d172ba6a58801e4e617] --- dev/ese/src/ese/_log/logredo.cxx | 16 ++++++++++++---- dev/ese/src/ese/db.cxx | 2 +- dev/ese/src/inc/log.hxx | 2 +- dev/ese/src/inc/space.hxx | 1 + 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/dev/ese/src/ese/_log/logredo.cxx b/dev/ese/src/ese/_log/logredo.cxx index c8f06f4a..a8bbb5db 100644 --- a/dev/ese/src/ese/_log/logredo.cxx +++ b/dev/ese/src/ese/_log/logredo.cxx @@ -2388,7 +2388,7 @@ ERR LOG::ErrLGRIEndEverySession() ERR LOG::ErrLGRIEndAllSessions( const BOOL fEndOfLog, - const BOOL fKeepDbAttached, + BOOL fKeepDbAttached, const LE_LGPOS * ple_lgposRedoFrom, BYTE * pbAttach ) { @@ -2397,9 +2397,17 @@ ERR LOG::ErrLGRIEndAllSessions( BOOL fNeedCallINSTTerm = fTrue; DBID dbid; - // UNDONE: is this call needed? - // - //(VOID)ErrVERRCEClean( ); + // If we are close to the CheckpointTooDeep limit, do a clean RecoveryQuit even if asked for + // dirty cache keepalive recovery quit at the end of recovery. + if ( fKeepDbAttached && fEndOfLog ) + { + const LONG lgenTooDeepLimit = (LONG)UlParam( m_pinst, JET_paramCheckpointTooDeep ) - lgenCheckpointTooDeepMin / 2; + const LONG lgenOutstanding = m_pLogStream->GetCurrentFileGen() - LgposGetCheckpoint().le_lGeneration; + if ( lgenOutstanding >= ( lgenTooDeepLimit * 90 ) / 100 ) + { + fKeepDbAttached = fFalse; + } + } // Set current time to attached db's dbfilehdr diff --git a/dev/ese/src/ese/db.cxx b/dev/ese/src/ese/db.cxx index b56a0680..a9fa252c 100644 --- a/dev/ese/src/ese/db.cxx +++ b/dev/ese/src/ese/db.cxx @@ -5642,7 +5642,7 @@ ERR ISAMAPI ErrIsamDetachDatabase( JET_SESID sesid, IFileSystemAPI* const pfsapi // 1. For the log writer it is OK to generate a new log w/o updating the header as no log operations // for this db will be logged in new logs // 2. For the checkpoint: don't advance the checkpoint if db's header weren't update - Assert( pfmp->FAllowHeaderUpdate() || pfmp->FReadOnlyAttach() ); + Assert( pfmp->FAllowHeaderUpdate() || pfmp->FReadOnlyAttach() || pfmp->FAttachedForRecovery() ); pfmp->RwlDetaching().EnterAsWriter(); pfmp->ResetAllowHeaderUpdate(); pfmp->RwlDetaching().LeaveAsWriter(); diff --git a/dev/ese/src/inc/log.hxx b/dev/ese/src/inc/log.hxx index 8eaf7b02..ff49b8dc 100644 --- a/dev/ese/src/inc/log.hxx +++ b/dev/ese/src/inc/log.hxx @@ -1896,7 +1896,7 @@ private: ERR ErrLGRIEndEverySession(); ERR ErrLGRIEndAllSessions( const BOOL fEndOfLog, - const BOOL fKeepDbAttached, + BOOL fKeepDbAttached, const LE_LGPOS * plgposRedoFrom, BYTE * pbAttach ); diff --git a/dev/ese/src/inc/space.hxx b/dev/ese/src/inc/space.hxx index 04136208..80087b22 100644 --- a/dev/ese/src/inc/space.hxx +++ b/dev/ese/src/inc/space.hxx @@ -423,6 +423,7 @@ INLINE BOOL FSPExpectedError( const ERR err ) case JET_errOutOfMemory: case JET_errOutOfBuffers: case JET_errTransactionTooLong: + case JET_errCheckpointDepthTooDeep: case JET_errDiskIO: case JET_errLogWriteFail: fExpectedErr = fTrue; From 382c55307acae22a6a4740b937cb9e3273db332d Mon Sep 17 00:00:00 2001 From: Umair Ahmad Date: Fri, 11 Nov 2022 20:32:14 +0000 Subject: [PATCH 098/102] Implement BBT buffer and associated data structures. This change contains: 1. New data structures for the BBT buffer and nodes stored in it. 2. Unit tests. 3. Debug extension. 3. Fix CPAGE::RevertDbtime() to stop changing arbitrary flags. It only allows the revert of fPageScrubbed and verifies the revert against cached state. The new data structure isn't hooked up to ESE yet. Only unit tests will exercise the new code. [Substrate:506e0ba2405fb79a7f1753955b2d8d207c6b3824] --- CMakeLists.txt | 1 + dev/ese/published/inc/cc.hxx | 1 + dev/ese/published/inc/collection.hxx | 169 ++ dev/ese/published/inc/jethdr.w | 9 + dev/ese/published/inc/os/math.hxx | 9 +- dev/ese/published/inc/os/types.hxx | 139 +- dev/ese/src/_errstr/errdata.txt | 7 + dev/ese/src/ese/CMakeLists.txt | 1 + dev/ese/src/ese/_log/logredo.cxx | 6 +- dev/ese/src/ese/bbtbuff.cxx | 1356 +++++++++++++++++ dev/ese/src/ese/bbtbuff_test.cxx | 1151 ++++++++++++++ dev/ese/src/ese/cpage.cxx | 79 +- dev/ese/src/ese/cpage_test.cxx | 47 +- dev/ese/src/ese/dbshrink.cxx | 4 +- .../src/ese/eselibwithtests/CMakeLists.txt | 1 + dev/ese/src/ese/node.cxx | 6 +- dev/ese/src/inc/bbtbuff.hxx | 1293 ++++++++++++++++ dev/ese/src/inc/bbtbuffwriter.hxx | 775 ++++++++++ dev/ese/src/inc/ccsr.hxx | 77 +- dev/ese/src/inc/cpage.hxx | 35 +- dev/ese/src/inc/daedef.hxx | 42 + dev/ese/src/inc/esestd.hxx | 1 + dev/ese/src/inc/jettest.hxx | 3 +- dev/ese/src/inc/node.hxx | 11 +- dev/ese/src/noncore/interop/exceptions.h | 52 + dev/ese/src/os/edbg.cxx | 146 ++ 26 files changed, 5339 insertions(+), 82 deletions(-) create mode 100644 dev/ese/src/ese/bbtbuff.cxx create mode 100644 dev/ese/src/ese/bbtbuff_test.cxx create mode 100644 dev/ese/src/inc/bbtbuff.hxx create mode 100644 dev/ese/src/inc/bbtbuffwriter.hxx diff --git a/CMakeLists.txt b/CMakeLists.txt index 8139dac7..362281d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -178,6 +178,7 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") /Gy # function-level linking /Zc:wchar_t- /Zc:forScope + /Zc:externConstexpr /Zc:inline- /GR- /TP diff --git a/dev/ese/published/inc/cc.hxx b/dev/ese/published/inc/cc.hxx index dcbc82af..5f848e3d 100644 --- a/dev/ese/published/inc/cc.hxx +++ b/dev/ese/published/inc/cc.hxx @@ -23,6 +23,7 @@ #endif +#include // // Source Annotation Language (SAL) diff --git a/dev/ese/published/inc/collection.hxx b/dev/ese/published/inc/collection.hxx index da4ed934..47cec5ec 100644 --- a/dev/ese/published/inc/collection.hxx +++ b/dev/ese/published/inc/collection.hxx @@ -7040,6 +7040,175 @@ inline T IrrPrev( T iCurrent, const size_t cRoundRobinBufferElements ) return ret; } +// ***************************************************** +// FixedArray helper class. +// Use with an allocator of your choice to create a runtime fixed sized array. +// For example, use with NoAlloc & _alloca to declare arrays on stack. +// + +class NoAlloc +{ +public: + // Lower-case to confirm with std::allocator + // Illegal to call allocate using this allocator (externally allocated) + // Legal to call deallocate, but does nothing + BYTE* allocate( std::size_t n ) = delete; + void deallocate( BYTE* p, std::size_t n ) {}; +}; + +class HeapAllocator +{ +public: + BYTE* allocate( std::size_t n ) { return new BYTE[ n ]; } + void deallocate( BYTE* pb, std::size_t n ) { delete[] pb; } +}; + +template +class FixedArray +{ +protected: + T* m_rgT = nullptr; + int m_cItems = 0; + bool m_fOwnsArray = false; + +public: + FixedArray() = default; + FixedArray( void* pv, int cItems, bool fOwnsArray = true ) : + m_rgT( reinterpret_cast( pv ) ), + m_cItems( cItems ), + m_fOwnsArray( fOwnsArray ) + { + // Vectorized placement-new, i.e. new()[] is broken in the C++ standard + // because it can pad the placement pointer with an arbitrary number of bytes + // (e.g. vc pads it with some bytes keeping count of objects passed to new()[]). + // This leads to UB because after padding, the initialized objects overrun the allocated buffer. + // To avoid this, we have to loop and initialize each object individually, thus avoiding new()[]. + if ( fOwnsArray ) + { + for ( int i = 0; i < cItems; i++ ) + { + new( &m_rgT[ i ] ) T{}; + } + } + } + + // Needed for the copy constructor. + template + friend class FixedArray; + + // The copy contructor for this class allows copying from an array whose elements are the same type. + // The allocator type doesn't matter because ownership isn't transferred with the copy. + template + FixedArray( const FixedArray& rhs ) + { + // Makes a copy, this means rhs stays intact. + // New object doesn't own the array. + m_rgT = rhs.m_rgT; + m_cItems = rhs.m_cItems; + m_fOwnsArray = false; + } + + FixedArray( FixedArray&& rhs ) + { + // Takes ownership of the array. + // rhs relinquishes control. + m_rgT = rhs.m_rgT; + m_cItems = rhs.m_cItems; + m_fOwnsArray = rhs.m_fOwnsArray; + rhs.m_rgT = NULL; + rhs.m_cItems = 0; + rhs.m_fOwnsArray = false; + } + + // Copy assignment makes a copy without transferring ownership. + template + const FixedArray& operator=( const FixedArray& rhs ) + { + Free(); + m_rgT = rhs.m_rgT; + m_cItems = rhs.m_cItems; + m_fOwnsArray = false; + return *this; + } + + // Move assignment takes ownership. + const FixedArray& operator= ( FixedArray&& rhs ) + { + std::swap( m_rgT, rhs.m_rgT ); + std::swap( m_cItems, rhs.m_cItems ); + std::swap( m_fOwnsArray, rhs.m_fOwnsArray ); + return *this; + } + +protected: + void Free() + { + if ( m_fOwnsArray ) + { + for ( int i = 0; i < m_cItems; i++ ) + { + m_rgT[ i ].~T(); + } + + TAlloc allocator; + allocator.deallocate( (BYTE*) m_rgT, sizeof( T ) * m_cItems ); + } + } + +public: + ~FixedArray() { Free(); } + int CItems() const { return m_cItems; } + T* PrgT() { return m_rgT; } + const T* PrgT() const { return m_rgT; } + const T& operator[]( int i ) const { return m_rgT[ i ]; } + T& operator[]( int i ) { return m_rgT[ i ]; } + operator const T* ( ) const { return m_rgT; } + operator T* ( ) { return m_rgT; } + FixedArray Subarray( int iBegin ) const { return Subarray( iBegin, m_cItems ); } + + // Implicit conversion to a FixedArray that doesn't own the array. + // Allows using FixedArray as a generic container for parameter passing. + operator FixedArray() const { return FixedArray( m_rgT, m_cItems, false ); } + + // Returns the range [iBegin, iEnd). + // Retains ownership of the subarray items. + FixedArray Subarray( int iBegin, int iEnd ) const + { + Assert( iBegin >= 0 ); + Assert( iEnd <= m_cItems ); + Assert( iEnd >= iBegin ); + return FixedArray( &m_rgT[ iBegin ], iEnd - iBegin, false ); + } + + template + void ForEach( TFunc func ) + { + for ( int i = 0; i < m_cItems; i++ ) + { + func( m_rgT[ i ] ); // TFunc takes T& + } + } + + template + void ForEach( TFunc func ) const + { + for ( int i = 0; i < m_cItems; i++ ) + { + func( m_rgT[ i ] ); // TFunc takes const T& + } + } + + static FixedArray MakeArray( int citems ) + { + TAlloc allocator; + BYTE* ptr = allocator.allocate( sizeof( T ) * citems ); + return FixedArray( ptr, ptr != NULL ? citems : 0 ); + } +}; + +template +using FixedHeapArray = FixedArray; + NAMESPACE_END( COLL ); using namespace COLL; diff --git a/dev/ese/published/inc/jethdr.w b/dev/ese/published/inc/jethdr.w index b6a9e759..283746f1 100644 --- a/dev/ese/published/inc/jethdr.w +++ b/dev/ese/published/inc/jethdr.w @@ -6108,6 +6108,15 @@ typedef JET_ERR (JET_API * JET_PFNEMITLOGDATA)( // begin_PubEsent #define JET_errPageTagCorrupted -357 // A tag / line on page is logically corrupted, offset or size is bad, or tag count on page is bad. #define JET_errNodeCorrupted -358 // A node or prefix node is logically corrupted, the key suffix size is larger than the node or line's size. +// end_PubEsent +#define errBBTBuffFull -359 /* BBT Buffer is full */ +#define errBBTNodeNotFound -360 /* Node not found in the BBT buffer */ +#define errBBTCurrencyLost -361 /* Currency on the BBT buffer couldn't be re-established */ +#define wrnBBTMergeTargetFull 362 /* Couldn't merge all the external nodes during a BBT evict operation. */ +#define wrnBBTPathUnvisitedNode 363 /* ErrBBTNextPath() / ErrBBTPrevPath() didn't switch paths because the currency should move first to an unvisited node in the current path.*/ +// begin_PubEsent +#define JET_errBBTNodeCorrupted -364 /* A property of the BBT node is logically corrupted. Or the BBT node isn't valid. */ +#define JET_errBBTBuffCorrupted -365 /* A BBT buff is logically corrupted. The nodes are out of sequence or the BBT header is corrupt. */ /* RECORD MANAGER errors /**/ diff --git a/dev/ese/published/inc/os/math.hxx b/dev/ese/published/inc/os/math.hxx index f3d8ad89..37d8fd03 100644 --- a/dev/ese/published/inc/os/math.hxx +++ b/dev/ese/published/inc/os/math.hxx @@ -21,7 +21,7 @@ // Power-of-2 related helpers. template -inline bool FPowerOf2( T x ) +constexpr inline bool FPowerOf2( T x ) { return ( ( 0 < x ) && ( 0 == ( x & ( x - 1 ) ) ) ); } @@ -69,6 +69,13 @@ inline USHORT UsBits( const DWORD dw ) return (USHORT)ret; } +inline ULONG Log2OfPowerOf2( ULONG x ) +{ + ULONG index; + _BitScanForward( &index, x ); + return index; +} + #pragma warning (push) #pragma warning (disable: 4293) // '>>': shift count negative or too big, undefined behavior diff --git a/dev/ese/published/inc/os/types.hxx b/dev/ese/published/inc/os/types.hxx index 8e4b4683..5deb3d89 100644 --- a/dev/ese/published/inc/os/types.hxx +++ b/dev/ese/published/inc/os/types.hxx @@ -6,6 +6,7 @@ #include #include +#include // build options @@ -14,6 +15,11 @@ #define INLINE inline #define NOINLINE __declspec(noinline) +// Required to prevent MSVC from adding extra bytes in class layout when using multiple empty base classes. +// Note: this is the default behavior on other compilers. +// Used primarily by Unaligned Little/BigEndian template classes. +#define EMPTY_BASES __declspec(empty_bases) + #define PUBLIC extern #define LOCAL_BROKEN #define LOCAL static @@ -292,7 +298,7 @@ inline T1& operator>>=( T1& t1, const T2& t2 ) // host endian-ness -const BOOL fHostIsLittleEndian = fTrue; +constexpr BOOL fHostIsLittleEndian = fTrue; inline constexpr BOOL FHostIsLittleEndian() { return fHostIsLittleEndian; @@ -459,13 +465,13 @@ struct extract_typearg< X > using TArg = T; }; -// A base class providing operator overloads for aligned data. +// A base class providing arithmetic operator overloads for types that handle aligned/unaligned LittlEndian/BigEndian data. // Requires the use of CRTP pattern to invoke static polymorphism for selecting the right conversion functions. // Requires the derived class to provide conversion functions to/from an integral type. // -// Note that we don't need to worry about whether the data is aligned or not. The line: +// Note that we don't need to worry about whether the data is aligned or not, little or big endian. The line: // TArg converted = (TArg) static_cast( *this ); -// makes an aligned copy on the stack of the underlying T datatype, regardless of whether +// makes a platform endian aligned copy on the stack of the underlying T datatype, regardless of whether // this OperatorOverload template is being used as the base for aligned OR unaligned data. template class COperatorOverloads @@ -543,14 +549,36 @@ public: }; +// A helper class to create stack copies of aligned and endian-corrected data. +// Provides pointer semantics on the object to allow calling const functions, +// if the underlying type is a struct/class. +template +struct AlignedPlatformEndian +{ + T m_t; + const T* operator->() const { return &m_t; } +}; + +// Provides struct/class deref operator overload to use with unaligned, endian types encapsulating structs/classes. +// Works similarly to COperatorOverloads above. +template +class DerefOverload +{ + using TArg = typename extract_typearg::TArg; + +public: + auto operator->() const { return AlignedPlatformEndian{ ( TArg ) static_cast( *this ) }; } +}; + + // big endian type template template< class T > -class BigEndian : public COperatorOverloads< BigEndian > +class EMPTY_BASES BigEndian : public COperatorOverloads< BigEndian >, public DerefOverload< BigEndian > { public: - BigEndian< T >() {}; + BigEndian< T >() = default; BigEndian< T >( const BigEndian< T >& be_t ); BigEndian< T >( const T& t ); @@ -605,10 +633,10 @@ inline BigEndian< T >& BigEndian< T >::operator=( const T& t ) // little endian type template template< class T > -class LittleEndian : public COperatorOverloads< LittleEndian > +class EMPTY_BASES LittleEndian : public COperatorOverloads< LittleEndian >, public DerefOverload< LittleEndian > { public: - LittleEndian< T >() {}; + LittleEndian< T >() = default; LittleEndian< T >( const LittleEndian< T >& le_t ); LittleEndian< T >( const T& t ); @@ -673,10 +701,10 @@ inline LittleEndian< T >& LittleEndian< T >::operator=( const T& t ) #define UCAST(T) *(T PERMIT_UNALIGNED_ACCESS *) template< class T > -class Unaligned : public COperatorOverloads< Unaligned > +class EMPTY_BASES Unaligned : public COperatorOverloads< Unaligned >, public DerefOverload< Unaligned > { public: - Unaligned< T >() PERMIT_UNALIGNED_ACCESS {}; + Unaligned< T >() PERMIT_UNALIGNED_ACCESS = default; Unaligned< T >( const Unaligned< T >& u_t ) PERMIT_UNALIGNED_ACCESS; Unaligned< T >( const T& t ) PERMIT_UNALIGNED_ACCESS; @@ -730,10 +758,10 @@ inline Unaligned< T >& Unaligned< T >::operator=( const T& t ) PERMIT_UNALIGNED_ // unaligned big endian type template template< class T > -class UnalignedBigEndian : public COperatorOverloads< UnalignedBigEndian > +class EMPTY_BASES UnalignedBigEndian : public COperatorOverloads< UnalignedBigEndian >, public DerefOverload< UnalignedBigEndian > { public: - UnalignedBigEndian< T >() PERMIT_UNALIGNED_ACCESS {}; + UnalignedBigEndian< T >() PERMIT_UNALIGNED_ACCESS = default; UnalignedBigEndian< T >( const UnalignedBigEndian< T >& ube_t ) PERMIT_UNALIGNED_ACCESS; UnalignedBigEndian< T >( const T& t ) PERMIT_UNALIGNED_ACCESS; @@ -788,10 +816,10 @@ inline UnalignedBigEndian< T >& UnalignedBigEndian< T >::operator=( const T& t ) // unaligned little endian type template template< class T > -class UnalignedLittleEndian : public COperatorOverloads< UnalignedLittleEndian > +class EMPTY_BASES UnalignedLittleEndian : public COperatorOverloads< UnalignedLittleEndian >, public DerefOverload< UnalignedLittleEndian > { public: - UnalignedLittleEndian< T >() PERMIT_UNALIGNED_ACCESS {}; + UnalignedLittleEndian< T >() PERMIT_UNALIGNED_ACCESS = default; UnalignedLittleEndian< T >( const UnalignedLittleEndian< T >& ule_t ) PERMIT_UNALIGNED_ACCESS; UnalignedLittleEndian< T >( const T& t ) PERMIT_UNALIGNED_ACCESS; @@ -842,6 +870,89 @@ inline UnalignedLittleEndian< T >& UnalignedLittleEndian< T >::operator=( const return *this; } +// Wraps a pointer to UnalignedLittleEndian data. +// The pointer itself is aligned and platform endian. The data it points to is unaligned little endian. +// Used to manipulate arrays of unaligned little endian data. +template +class UnalignedLittleEndianPtr +{ +public: + // A reference to data pointed by UnalignedLittleEndianPtr + // Returned as a result of derefing UnalignedLittleEndianPtr + class Ref + { + public: + Ref( T PERMIT_UNALIGNED_ACCESS* ptr ) : m_pT( ptr ) {} + + operator T() const + { + // Dereference needs unaligned memory access + T t = UCAST( T )m_pT; + return ReverseBytesOnBE( t ); + } + + // Asignment operator is marked const because it doesn't modify this object, only modifies referenced data. + const Ref& operator=( const T& t ) const + { + static_assert( !std::is_const::value, "Assignment to a pointer to const is not allowed" ); + UCAST( T )m_pT = ReverseBytesOnBE( t ); + return *this; + } + + private: + T PERMIT_UNALIGNED_ACCESS* m_pT; // this member itself is aligned + }; + + UnalignedLittleEndianPtr() = default; + UnalignedLittleEndianPtr( const UnalignedLittleEndianPtr& ule_t ) { m_pT = ule_t.m_pT; } + UnalignedLittleEndianPtr( T PERMIT_UNALIGNED_ACCESS* ptr ) : m_pT( ptr ){} + + Ref operator[]( int index ) { return Ref( m_pT + index ); } + Ref operator*() { return Ref( m_pT ); } + UnalignedLittleEndianPtr operator+( int index ) const { return UnalignedLittleEndianPtr( m_pT + index ); } + bool operator==( const UnalignedLittleEndianPtr& rhs ) const { return m_pT == rhs.m_pT; } + bool operator!=( const UnalignedLittleEndianPtr& rhs ) const { return m_pT != rhs.m_pT; } + + T operator[]( int index ) const + { + T t = UCAST( T )( m_pT + index ); + return ReverseBytesOnBE( t ); + } + + T operator*() const + { + // Dereference operator needs unaligned memory access. + T t = UCAST( T )m_pT; + return ReverseBytesOnBE( t ); + } + + UnalignedLittleEndianPtr& operator=( const UnalignedLittleEndianPtr& ule_t ) + { + m_pT = ule_t.m_pT; + return *this; + } + + UnalignedLittleEndianPtr& operator=( T PERMIT_UNALIGNED_ACCESS* ptr ) + { + m_pT = ptr; + return *this; + } + + UnalignedLittleEndianPtr& operator++() + { + ++m_pT; + return *this; + } + + UnalignedLittleEndianPtr operator++( int ) + { + return UnalignedLittleEndianPtr( m_pT++ ); + } + +private: + T PERMIT_UNALIGNED_ACCESS* m_pT; // this member itself is aligned +}; + // special type qualifier to allow unaligned access to variables // diff --git a/dev/ese/src/_errstr/errdata.txt b/dev/ese/src/_errstr/errdata.txt index 7999b47c..169bc5d7 100644 --- a/dev/ese/src/_errstr/errdata.txt +++ b/dev/ese/src/_errstr/errdata.txt @@ -119,6 +119,13 @@ INTERNAL_WRN( 356, Unknown, wrnSPRequestSpBufRefill ) EXTERNAL_ERR( -357, Corruption, JET_errPageTagCorrupted ) EXTERNAL_ERR( -358, Corruption, JET_errNodeCorrupted ) + INTERNAL_ERR( -359, State, errBBTBuffFull ) + INTERNAL_ERR( -360, State, errBBTNodeNotFound ) + INTERNAL_ERR( -361, State, errBBTCurrencyLost ) + INTERNAL_WRN( 362, State, wrnBBTMergeTargetFull ) + INTERNAL_WRN( 363, State, wrnBBTPathUnvisitedNode ) + EXTERNAL_ERR( -364, Corruption, JET_errBBTNodeCorrupted ) + EXTERNAL_ERR( -365, Corruption, JET_errBBTBuffCorrupted ) INTERNAL_WRN( 400, Unknown, wrnFLDKeyTooBig ) INTERNAL_ERR( -401, Unknown, errFLDTooManySegments ) INTERNAL_WRN( 402, Unknown, wrnFLDNullKey ) diff --git a/dev/ese/src/ese/CMakeLists.txt b/dev/ese/src/ese/CMakeLists.txt index fb0f0af4..60f8d7c6 100644 --- a/dev/ese/src/ese/CMakeLists.txt +++ b/dev/ese/src/ese/CMakeLists.txt @@ -23,6 +23,7 @@ add_compile_definitions( set(ESE_SOURCES ${ESE_DEV}/src/ese/backup.cxx + ${ESE_DEV}/src/ese/bbtbuff.cxx ${ESE_DEV}/src/ese/bf.cxx ${ESE_DEV}/src/ese/bt.cxx ${ESE_DEV}/src/ese/callback.cxx diff --git a/dev/ese/src/ese/_log/logredo.cxx b/dev/ese/src/ese/_log/logredo.cxx index a8bbb5db..f4a56d83 100644 --- a/dev/ese/src/ese/_log/logredo.cxx +++ b/dev/ese/src/ese/_log/logredo.cxx @@ -179,10 +179,12 @@ LOCAL ERR ErrReplacePageImage( Call( csr.ErrLoadPage( ppib, ifmp, pgno, pbBeforeImage, cb, latchWrite ) ); - // the before image of the page is logged after the dbtime was updated so we have to restore it + // the before image of the page is logged after the dbtime was updated so we have to revert it + csr.RevertDbtime( dbtimeBefore, csr.Cpage().FFlags() ); + // Its also possible we are replaying a log on an available lag on a table which was deleted and reverted with fPageFDPDelete. // We do not want to overwrite that flag. - csr.RestoreDbtime( dbtimeBefore, fPageFDPDeleteBefore ); + csr.Cpage().SetPageFDPDelete( fPageFDPDeleteBefore ); csr.Downgrade( latchRIW ); Assert( csr.Cpage().FPageFDPDelete() == fPageFDPDeleteBefore ); diff --git a/dev/ese/src/ese/bbtbuff.cxx b/dev/ese/src/ese/bbtbuff.cxx new file mode 100644 index 00000000..60144316 --- /dev/null +++ b/dev/ese/src/ese/bbtbuff.cxx @@ -0,0 +1,1356 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#include "std.hxx" + +BBTBuff::~BBTBuff() +{ + // Caller is responsible for managing latches. +} + +ERR BBTBuff::ErrEnsurePageLatched( SkipListLink link, LATCH latchType ) +{ + PageOffsetTuple pgOffset = IpgOffsetFromLink( link ); + return ErrEnsurePageLatched( pgOffset.ipg, latchType ); +} + +ERR BBTBuff::ErrEnsurePageLatched( int ipgOffset, LATCH latchType ) +{ + ERR err = JET_errSuccess; + Assert( ipgOffset >= 0 && ipgOffset < Cpg() ); + + CSR* pcsr = Pcsr( ipgOffset ); + if ( pcsr->Latch() != latchType ) + { + switch ( pcsr->Latch() ) + { + case latchNone: + { + if ( latchType != latchWrite ) + { + CallR( pcsr->ErrGetPage( m_ppib, m_ifmp, m_pcsrBase->Pgno() + ipgOffset, latchType ) ); + } + else + { + // ErrGetPage() doesn't support getting a write latch directly + CallR( pcsr->ErrGetPage( m_ppib, m_ifmp, m_pcsrBase->Pgno() + ipgOffset, latchRIW ) ); + pcsr->UpgradeFromRIWLatch(); + } + break; + } + + case latchReadTouch: + EnforceSz( false, "Upgrade from read isn't allowed." ); + err = ErrERRCheck( JET_errInternalError ); + break; + + case latchRIW: + //Assert( m_latchType == latchRIW ); + EnforceSz( latchType == latchWrite, "Unsupported latch state transition from RIW latch" ); + pcsr->UpgradeFromRIWLatch(); + break; + + case latchWrite: + // if the caller asks for RIW, and we have write, it is fine + Assert( m_latchType == latchRIW ); + EnforceSz( latchType == latchRIW, "Unsupported latch state transition from write latch" ); + break; + + default: + EnforceSz( false, "Unsupported current latch state" ); + err = ErrERRCheck( JET_errInternalError ); + } + } + + Assert( pcsr->Cpage().FBBTBuffPage() ); + Assert( ( ipgOffset > 0 ) == ( !pcsr->Cpage().FBBTBuffRootPage() ) ); // only the first page is marked with the BBTBuff root flag + Assert( m_pcsrBase->Dbtime() == pcsr->Dbtime() ); + return err; +} + +void BBTBuff::DowngradeLatches() +{ + if ( m_latchType == latchRIW || m_latchType == latchReadTouch || m_latchType == latchReadNoTouch ) + { + for ( int i = 0; i < Cpg(); i++ ) + { + CSR* pcsrCurr = Pcsr( i ); + if ( latchWrite == pcsrCurr->Latch() ) + { + pcsrCurr->Downgrade( m_latchType ); + } + } + } + else + { + Assert( m_latchType == latchWrite ); + } +} + +// Seeks to a particular key. +// Positions at node(s) based on SeekMode: +// - SeekMode::LEQ positions at LessThan Or Equal. +// - SeekMode::LT positions at LessThan. +// For duplicate nodes, level0 position is at the last (which is the latest) node in the sequence. +// If all nodes are greater, then positions at the first node, (which would be the oldest node in the sequence). +// Returns links and nodes at each level (which may belong to different nodes). +// SeekPos::Prev returns links/nodes to the node before the seeked node (at each level), needed for Delete. +// Note: SeekPos doesn't modify what key is seeked to, only changes whether returned links point to the seek key, +// or its predecessors. +// Returns the comparison value of the seeked key to the nearest node in the order below (based on SeekMode, SeekPos has no effect): +// 1. If there is a key equal to seeked key, result is 0. +// 2. Else if LT keys exist, returns < 0. +// 3. Else if only GT keys exist, returns > 0. +// SeekMode::LT switches the evaluation order of 1 & 2. If LT exists returns < 0, Else if EQ exists, returns = 0. +// Note that rgNodes[], rgLinks[] only return what was asked. For example, they will be null if caller asked SeekMode::LT, and we only found GEQ nodes. +ERR BBTBuff::ErrSeek_( + const KEY& key, + SeekMode seekMode, + SeekPos seekPos, + _Out_ int* piResult, + _Out_ SkipListLink rgLinks[ MAX_LEVELS ], + _Out_ SkipListNode* rgNodes[ MAX_LEVELS ] ) +{ + ERR err = JET_errSuccess; + int cmp = -1; // default value, if the list is empty + int cmpLEQ = 1; + SkipListNode* pnodePrev = NULL; + SkipListNode* pnodeCurr = NULL; + SkipListNode* pnodeNext = NULL; + SkipListLink linkNodePrev( 0 ); + SkipListLink linkNodeCurr( 0 ); + SkipListLink linkNodeNext( 0 ); + SkipListLinkArray rgLinksCurr = RgSkipListLinksHead( m_pHeader ); + + const int comparand = ( seekMode == SeekMode::LT ? 0 : + seekMode == SeekMode::LEQ ? 1 : + -1 ); + + // Search the list for the given key + for ( int i = MAX_LEVELS - 1; i >= 0; i-- ) + { + Call( ErrPnodeFromLink_AcqLatch( rgLinksCurr[ i ], &pnodeNext ) ); + linkNodeNext = rgLinksCurr[ i ]; + while ( pnodeNext != NULL ) + { + cmp = pnodeNext->CmpKey( key ); + if ( cmp < comparand ) + { + // Next key is still behind the seeked position + // MoveNext + pnodePrev = pnodeCurr; + pnodeCurr = pnodeNext; + linkNodePrev = linkNodeCurr; + linkNodeCurr = linkNodeNext; + rgLinksCurr = pnodeCurr->RgLinksNext(); + Call( ErrPnodeFromLink_AcqLatch( rgLinksCurr[ i ], &pnodeNext ) ); + linkNodeNext = rgLinksCurr[ i ]; + cmpLEQ = cmp; + } + else + { + // Next key is after the seeked position + // So move to lower level. + break; + } + } + + // Next key is after the seeked position (or NULL). + // Seek point found at the current level. + // Now store pointers at the current level for caller. + // Null link/pnode means there is no prev node (m_pHeader->rgSkipListLinksHead[i] should still point here) + + // Drill down levels; for all lower levels where + // the next node is also the next node at the lower level. + for ( i; i >= 1; i-- ) + { + if ( seekPos == SeekPos::Prev ) + { + // Caller gets the prev node at current level. + rgLinks[ i ] = linkNodePrev; + rgNodes[ i ] = pnodePrev; + + // If we found a match at the current level, we've moved to that node (to seek to the latest in the duplicate sequence). + // But to return SeekPos::Prev, we need to find nodes sandwiched between pnodePrev and pnodeCurr at the lower level. + // We would've skipped those nodes when we were moving next at a higher level. + // To establist the correct prev node at the lower level we are drilling down to, + // we have to walk the links at the lower level from the prev node until we land on the curr node. + if ( !linkNodeCurr.FNull() ) + { + SkipListLinkArray rgLinksPrev = ( pnodePrev != NULL ? pnodePrev->RgLinksNext() : RgSkipListLinksHead( m_pHeader ) ); + while ( rgLinksPrev[ i - 1 ] != linkNodeCurr ) + { + // MoveNext (there is a node between prev and curr). + linkNodePrev = rgLinksPrev[ i - 1 ]; + Assert( !linkNodePrev.FNull() ); // we can't walk off the end without hitting pnodeCurr, or the skiplist is ill-formed + Call( ErrPnodeFromLink_AcqLatch( linkNodePrev, &pnodePrev ) ); + rgLinksPrev = pnodePrev->RgLinksNext(); + } + } + } + else + { + // Caller gets the current node. + rgLinks[ i ] = linkNodeCurr; + rgNodes[ i ] = pnodeCurr; + } + + if ( linkNodeNext != rgLinksCurr[ i - 1 ] ) + { + // Note that i will be decremented again once by the outer for loop. + break; + } + } + } + + // A quirk of the seek loop above, level0 remains to be set. + if ( seekPos == SeekPos::Prev ) + { + rgLinks[ 0 ] = linkNodePrev; + rgNodes[ 0 ] = pnodePrev; + } + else + { + rgLinks[ 0 ] = linkNodeCurr; + rgNodes[ 0 ] = pnodeCurr; + } + + // if pnodeCurr was found, then it was LEQ to the seek key. Then we should return the LEQ cmp value (as the seek loop would break when a comparison is greater). + // else, return cmp. If no node is found, we use cmp (the value of last comparison). + *piResult = ( cmpLEQ <= 0 ? cmpLEQ : cmp ); + +HandleError: + return err; +} + +void BBTBuff::AssertLatchedAll( LATCH latchType ) +{ +#ifdef DEBUG + for ( int i = 0; i < Cpg(); i++ ) + { + CSR* pcsr = Pcsr( i ); + Assert( pcsr->Latch() == latchType ); + } +#endif +} + +void BBTBuff::AssertReadyForWrite() +{ +#ifdef DEBUG + for ( int i = 0; i < Cpg(); i++ ) + { + CSR* pcsr = Pcsr( i ); + Assert( pcsr->Latch() == latchWrite ); + Assert( pcsr->FDirty() ); + } +#endif +} + +// Inserts a new node into the list. +// Duplicate nodes are inserted at the tail of a duplicate sequence. +SkipListNode* BBTBuff::PnodeInsert_( + const BBTBuffChangeContext& changeCtx, + SkipListNode* rgNodes[ MAX_LEVELS ], + BBTBuffOpcode opcode, + const KEY& key, + const DATA& data, + SkipListNodeFlags flags, + bool fDuplicate ) +{ + AssertReadyForWrite(); + + int cbNodeInsert = SkipListNode::Cb( changeCtx.level, key.Cb(), data.Cb() ); + SkipListLinkArray rgLinksHead = RgSkipListLinksHead( m_pHeader ); + SkipListLink linkNext0 = ( rgNodes[ 0 ] ? rgNodes[ 0 ]->LinkNext0() : rgLinksHead[ 0 ] ); + SkipListLink linkNew = changeCtx.linkCurr; + int level = changeCtx.level; + + Assert( cbNodeInsert <= CbMaxNodeSize() ); + + // Start modifying the header + int cbUsed = SkipListLink::Roundup( cbNodeInsert ); + m_pHeader->le_cbFree -= ( cbUsed + linkNew.ToInt() - m_pHeader->le_ibMicFree->ToInt() ); // count wasted space because of a page switch, too + EnforceSz( m_pHeader->le_cbFree >= 0, "BBTBuff: InsertOverflow" ); + SkipListLink ibMicFree = linkNew; + ibMicFree.Inc( cbUsed ); + m_pHeader->le_ibMicFree = ibMicFree; + m_pHeader->le_cNodes++; // The node is now part of the list, officially + + PageOffsetTuple pgOffsetNew = IpgOffsetFromLink( linkNew ); + + // In-place initialize the node on the page. + SkipListNode* pnodeNew = SkipListNode::Create( PbPage( pgOffsetNew.ipg ) + pgOffsetNew.ibOnPage, level, opcode, key.Cb(), data.Cb() ); + pnodeNew->SetNodeKey( key ); + pnodeNew->SetNodeData( data ); + pnodeNew->SetNodeFlags( flags ); + + // Modify the skip list + Assert( pnodeNew->IsValid() ); + pnodeNew->SetLinkPrev0( changeCtx.rgLinksPrev[ 0 ] ); + SkipListLinkArray rgLinksNew = pnodeNew->RgLinksNext(); + + // Adjust links for prev node(s) at each level + for ( int i = level; i >= 0; i-- ) + { + SkipListLinkArray rgLinksPrev = rgNodes[ i ] ? rgNodes[ i ]->RgLinksNext() : rgLinksHead; + rgLinksNew.SetLink( i, rgLinksPrev[ i ] ); + rgLinksPrev.SetLink( i, linkNew ); + } + + // Adjust duplicate flag of the prev node + if ( rgNodes[ 0 ] != NULL ) + { + rgNodes[ 0 ]->SetDuplicateNext0( fDuplicate ); + } + else + { + Assert( fDuplicate == false ); + } + + // Adjust m_linkPrev0 of the next node + if ( !linkNext0.FNull() ) + { + SkipListNode* pnodeNext = PnodeFromLink( linkNext0 ); + EnforceSz( changeCtx.rgLinksPrev[ 0 ] == pnodeNext->LinkPrev0(), "BBTBuffCorrupt" ); + pnodeNext->SetLinkPrev0( linkNew ); + + // We should never be inserting in the middle of a duplicate sequence. + Assert( pnodeNew->CmpKey( pnodeNext->Key() ) < 0 ); + } + +#ifdef DEBUG + // Check all the links in the inserted node are valid + SkipListNode* pnode = PnodeFromLink( pnodeNew->LinkPrev0() ); + int cmpWithPrev = ( pnode != NULL ? pnodeNew->CmpKey( pnode->Key() ) : 1 ); + Assert( cmpWithPrev >= 0 ); + Assert( fDuplicate == ( cmpWithPrev == 0 ) ); + Assert( pnode == NULL || pnode->FDuplicateNext0() == fDuplicate ); + + for ( int i = 0; i <= pnodeNew->Level(); i++ ) + { + pnode = PnodeFromLink( rgLinksNew[ i ] ); + if ( pnode != NULL ) + { + Assert( pnode->IsValid() ); + Assert( pnodeNew->CmpKey( pnode->Key() ) <= 0 ); + } + } + + Assert( !rgLinksHead[ 0 ].FNull() ); +#endif + + return pnodeNew; +} + +// Note: this is a physical delete. Currently, only used to undo a versioned operation during rollback by verstore. +void BBTBuff::Delete_( + BBTBuffChangeContext changeCtx, + SkipListNode* rgNodes[ MAX_LEVELS ], + const SkipListNode* pnodeToDelete ) +{ + AssertReadyForWrite(); + Assert( pnodeToDelete != NULL ); + + int level = changeCtx.level; + SkipListLink linkDel = changeCtx.linkCurr; + SkipListLink linkNext0 = pnodeToDelete->LinkNext0(); + SkipListNode* pnodeNext0 = PnodeFromLink( linkNext0 ); + + // Start modifying the pages + + if ( pnodeNext0 ) + { + pnodeNext0->SetLinkPrev0( pnodeToDelete->LinkPrev0() ); + } + + // Adjust links for prev node(s) at each level + Assert( level == pnodeToDelete->Level() ); + SkipListLinkArray rgLinksNextToDel = pnodeToDelete->RgLinksNext(); + for ( int i = pnodeToDelete->Level(); i >= 0; i-- ) + { + SkipListLinkArray rgLinksPrevToDel = ( rgNodes[ i ] != NULL ? rgNodes[ i ]->RgLinksNext() : RgSkipListLinksHead( m_pHeader ) ); + EnforceSz( rgLinksPrevToDel[ i ] == linkDel, "BBTBuffDelete_CorruptedLinks"); // the link being replaced must point to the node being deleted + rgLinksPrevToDel.SetLink( i, rgLinksNextToDel[ i ] ); + } + + // Fix duplicate flag on the prev node, if the node being deleted is a duplicate of it. + if ( rgNodes[ 0 ] != NULL && rgNodes[ 0 ]->FDuplicateNext0() ) + { + // Transfer duplicate flag to the prev node: + // - if the node being deleted has a next duplicate, then the prev node also has a next duplicate after deletion. + // - if not, then the prev node doesn't either. + rgNodes[ 0 ]->SetDuplicateNext0( pnodeToDelete->FDuplicateNext0() ); + } + + Assert( m_pHeader->le_cNodes > 0 ); + m_pHeader->le_cNodes--; + m_pHeader->le_cbFree += pnodeToDelete->Cb(); + Assert( m_pHeader->le_cbFree <= CbMax() ); +} + +// Deletes a range of nodes, starting from the given link. +// The range delete operation must start on the oldest node of a duplicate sequence, +// and end at the latest node of a duplicate sequence. +// It is an n*log(n) operation, average case. +void BBTBuff::RangeDelete_( SkipListLink linkFirst, int cNodes ) +{ + ERR err = JET_errSuccess; + + AssertReadyForWrite(); + ResetCurr(); + + // Check/Prepare for deletion + // All modifications are deferred until we know that delete can succeed unconditionally + SkipListNode* pnodeFirst = PnodeFromLink( linkFirst ); + SkipListNode* pnodeCurr = pnodeFirst; + SkipListLink linkCurr = linkFirst; + KEY keyFirst = pnodeFirst->Key(); + int cbDeleted = 0; + + // Assert that deleted range doesn't start or end in the middle of a duplicate sequence + // Range delete does't fix duplicate flags. It doesn't need to because range delete should + // always span duplicate nodes. + SkipListNode* pnodeBeforeFirst = PnodeFromLink( pnodeFirst->LinkPrev0() ); + EnforceSz( pnodeBeforeFirst == NULL || pnodeBeforeFirst->FDuplicateNext0() == false, "RangeDelete()_: PartiallyDeletingDuplicateSequence"); + Assert( cNodes > 0 ); + + // We have to seek to find previous node at each level to remove the node from the skiplist. + int result; + SkipListLink rgLinksPrev[ MAX_LEVELS ] = {}; // braces zero-initialize the array + SkipListNode* rgpNodes[ MAX_LEVELS ] = {}; + err = ErrSeek_( keyFirst, BBTBuff::SeekMode::LT, BBTBuff::SeekPos::Curr, &result, rgLinksPrev, rgpNodes ); + EnforceSz( result <= 0 && err == JET_errSuccess, "RangeDelete_(): SeekFailed" ); + + // Store addresses of the link field of the prev node at each level. + // This field needs to be modified to point to the after-last node at each level. + //SkipListLink* rgpLinksPrev[ MAX_LEVELS ]; + //for ( int i = MAX_LEVELS; i >= 0; i-- ) + //{ + // rgpLinksPrev[ i ] = ( rgNodes[ i ] != NULL ? rgNodes[ i ]->PrgLinks() + i : &m_pHeader->rgSkipListLinksHead[ i ] ); + //} + + // Walk all nodes in the range-delete, and move links of the prev node forward step-by-step to after-last, + // at each level. + SkipListLinkArray rgLinksHead = RgSkipListLinksHead( m_pHeader ); + SkipListLink rgLinksAfterLast[ MAX_LEVELS ]; + int iNode = 0; + + // Start from the current node, at each level. + for ( int i = 0; i < MAX_LEVELS; i++ ) + { + rgLinksAfterLast[ i ] = ( rgpNodes[ i ] != NULL ? rgpNodes[ i ]->RgLinksNext()[ i ] : rgLinksHead[ i ] ); + } + + while ( pnodeCurr != NULL ) + { + SkipListLinkArray rgLinksCurr = pnodeCurr->RgLinksNext(); + for ( int level = pnodeCurr->Level(); level >= 0; level-- ) + { + // Move link to the next node at the current level. + Assert( rgLinksAfterLast[ level ] == linkCurr ); + rgLinksAfterLast[ level ] = rgLinksCurr[ level ]; + } + + cbDeleted += pnodeCurr->Cb(); + linkCurr = rgLinksCurr[ 0 ]; + pnodeCurr = PnodeFromLink( linkCurr ); + iNode++; + if ( iNode >= cNodes ) + { + break; + } + } + + // Fix the prev link of the after-last node, if it exists. + if ( pnodeCurr != NULL ) + { + Assert( pnodeFirst->LinkPrev0() == rgLinksPrev[ 0 ] ); + pnodeCurr->SetLinkPrev0( rgLinksPrev[ 0 ] ); + } + + // Fix the next links at all levels to point to the after last nodes. + for ( int level = 0; level < MAX_LEVELS; level++ ) + { + ( rgpNodes[ level ] ? rgpNodes[ level ]->RgLinksNext() : rgLinksHead ).SetLink( level, rgLinksAfterLast[ level ] ); + } + + m_pHeader->le_cNodes -= cNodes; + m_pHeader->le_cbFree += cbDeleted; + + Assert( m_pHeader->le_cNodes >= 0 ); + Assert( m_pHeader->le_cbFree <= CbMax() ); +} + +// Seeks to the given key. Favors previous key if no match found. +// The cursor is placed at LessThanOrEqual, unless all nodes are greater than the key. +ERR BBTBuff::ErrSeekLEQ( const KEY& key ) +{ + ERR err = JET_errSuccess; + Assert( m_pcsrBase->FLatched() ); + ResetCurr(); + + int result; + SkipListLink rgLinks[ MAX_LEVELS ]; + SkipListNode* rgNodes[ MAX_LEVELS ]; + Call( ErrSeek_( key, SeekMode::LEQ, SeekPos::Curr, &result, rgLinks, rgNodes ) ); + + if ( result == 0 ) + { + Assert( rgNodes[ 0 ]->CmpKey( key ) == 0 ); + Call( ErrAssertIsLatestInDuplicateSequence( rgNodes[ 0 ] ) ); + + PageOffsetTuple pgOffset = IpgOffsetFromLink( rgLinks[ 0 ] ); + ChangeCurr( pgOffset.ipg, rgNodes[ 0 ] ); + err = JET_errSuccess; + } + else if ( rgNodes[ 0 ] != NULL ) + { + // If exact node not found, the Seek loop leaves currency immediately before the seek key + Assert( result < 0 ); + Assert( rgNodes[ 0 ]->CmpKey( key ) < 0 ); + + // We've landed on a node immediately before the seek key. The skip list guarantees + // that this is the tail of a duplicate sequence. + PageOffsetTuple pgOffset = IpgOffsetFromLink( rgLinks[ 0 ] ); + ChangeCurr( pgOffset.ipg, rgNodes[ 0 ] ); + err = ErrERRCheck( wrnNDFoundLess ); + } + else if( !RgSkipListLinksHead( m_pHeader )[ 0 ].FNull() ) + { + // If all nodes are greater than the seek key + // and there is atleast one node in the list + Assert( result > 0 ); + + PageOffsetTuple pgOffset = IpgOffsetFromLink( RgSkipListLinksHead( m_pHeader )[ 0 ] ); + SkipListNode* pnodeCurr = PnodeFromIpgOffset( pgOffset ); + Assert( pnodeCurr->CmpKey( key ) > 0 ); + + if ( !pnodeCurr->FDuplicateNext0() ) + { + ChangeCurr( pgOffset.ipg, pnodeCurr ); + } + else + { + // We have to walk the list at level0 to get to the tail of the current duplicate sequence. + // It is wasted effort if the current node's data/flags are never accessed. + // UA_TODO: For simplicity, lets see if this contract makes sense. + SkipListLink linkLatest; + Call( ErrGetLatestInDuplicateSequence( pnodeCurr, &linkLatest ) ); + + if ( !linkLatest.FNull() ) + { + pgOffset = IpgOffsetFromLink( linkLatest ); + SkipListNode* pnodeLatest = PnodeFromIpgOffset( pgOffset ); + ChangeCurr( pgOffset.ipg, pnodeLatest ); + } + else + { + pgOffset = IpgOffsetFromLink( rgLinks[ 0 ] ); + ChangeCurr( pgOffset.ipg, rgNodes[ 0 ] ); + } + } + + err = ErrERRCheck( wrnNDFoundGreater ); + } + else + { + // If we aren't leaving currency at some node, BBTBuff must be empty + Assert( m_pHeader->le_cNodes == 0 ); + err = ErrERRCheck( errBBTNodeNotFound ); + } + +HandleError: + return err; +} + +// Seeks to a key that is equal to or greater than the given key. +// Returns errBBTNodeNotFound if all keys are less or the list is empty. +// Places cursor at the oldest node in the duplicate sequence. +// WARNING: This is needed to move BBTBuff nodes in an internal split. +// Don't use for establishing currency. +ERR BBTBuff::ErrSeekGEQOldest( const KEY& key ) +{ + ERR err = JET_errSuccess; + Assert( m_pcsrBase->FLatched() ); + ResetCurr(); + + int result; + SkipListLink rgLinks[ MAX_LEVELS ]; + SkipListNode* rgNodes[ MAX_LEVELS ]; + Call( ErrSeek_( key, SeekMode::LT, SeekPos::Curr, &result, rgLinks, rgNodes ) ); + + if ( result < 0 && rgNodes[ 0 ] != NULL ) + { + // LT node found, next node (if exists) will be GEQ. + Assert( rgNodes[ 0 ]->CmpKey( key ) < 0 ); + Call( ErrAssertIsLatestInDuplicateSequence( rgNodes[ 0 ] ) ); + + if ( !rgNodes[ 0 ]->LinkNext0().FNull() ) + { + PageOffsetTuple pgOffset = IpgOffsetFromLink( rgNodes[ 0 ]->LinkNext0() ); + SkipListNode* pnodeGEQ = PnodeFromIpgOffset( pgOffset ); + int cmp = pnodeGEQ->CmpKey( key ); + Call( ErrAssertIsOldestInDuplicateSequence( pnodeGEQ ) ); + + Assert( cmp >= 0 ); + ChangeCurr( pgOffset.ipg, pnodeGEQ ); + err = ( cmp > 0 ? ErrERRCheck( wrnNDFoundGreater ) : JET_errSuccess ); + } + else + { + err = ErrERRCheck( errBBTNodeNotFound ); + } + } + else if ( !RgSkipListLinksHead( m_pHeader )[ 0 ].FNull() ) + { + // If all nodes are GEQ than the seek key + // and there is atleast one node in the list + Assert( result >= 0 ); + + PageOffsetTuple pgOffset = IpgOffsetFromLink( RgSkipListLinksHead( m_pHeader )[ 0 ] ); + SkipListNode* pnodeGEQ = PnodeFromIpgOffset( pgOffset ); + Assert( result == pnodeGEQ->CmpKey( key ) ); + ChangeCurr( pgOffset.ipg, pnodeGEQ ); + err = ( result > 0 ? ErrERRCheck( wrnNDFoundGreater ) : JET_errSuccess ); + } + else + { + // If we aren't leaving currency at some node, BBTBuff must be empty + Assert( m_pHeader->le_cNodes == 0 ); + err = ErrERRCheck( errBBTNodeNotFound ); + } + +HandleError: + return err; +} + +ERR BBTBuff::ErrMoveFirst( SeekFlags fFlags ) +{ + ERR err = JET_errSuccess; + ResetCurr(); + + if ( !RgSkipListLinksHead( m_pHeader )[ 0 ].FNull() ) + { + PageOffsetTuple pgOffset = IpgOffsetFromLink( RgSkipListLinksHead( m_pHeader )[ 0 ] ); + CallR( ErrEnsurePageLatched( pgOffset.ipg, m_latchType ) ); + + SkipListNode* pnodeCurr = PnodeFromIpgOffset( pgOffset ); + if ( fFlags == sfSkipDuplicates ) + { + SkipListLink link( 0 ); + CallR( ErrGetLatestInDuplicateSequence( pnodeCurr, &link ) ); + if ( !link.FNull() ) + { + pgOffset = IpgOffsetFromLink( link ); + pnodeCurr = PnodeFromIpgOffset( pgOffset ); + } + } + + ChangeCurr( pgOffset.ipg, pnodeCurr ); + return err; + } + else + { + return ErrERRCheck( errBBTNodeNotFound ); + } +} + +// ErrMoveLast is a log(n) operation (average) +ERR BBTBuff::ErrMoveLast() +{ + ERR err = JET_errSuccess; + Assert( m_pcsrBase->FLatched() ); + ResetCurr(); + + if ( RgSkipListLinksHead( m_pHeader )[ 0 ].FNull() ) + { + return ErrERRCheck( errBBTNodeNotFound ); + } + + SkipListNode* pnodeCurr = NULL; + SkipListLink linkCurr( 0 ); + SkipListLinkArray rgLinks = RgSkipListLinksHead( m_pHeader ); + + for ( int i = MAX_LEVELS - 1; i >= 0; i-- ) + { + SkipListNode* pnodeNext; + Call( ErrPnodeFromLink_AcqLatch( rgLinks[ i ], &pnodeNext ) ); + while ( pnodeNext != NULL ) + { + // MoveNext at the same level (until we reach the end node at the current level) + pnodeCurr = pnodeNext; + linkCurr = rgLinks[ i ]; + rgLinks = pnodeCurr->RgLinksNext(); + Call( ErrPnodeFromLink_AcqLatch( rgLinks[ i ], &pnodeNext ) ); + } + } + + Assert( pnodeCurr != NULL ); + Assert( rgLinks[ 0 ].FNull() ); // should be the last node + Assert( pnodeCurr->IsValid() ); + + { + PageOffsetTuple pgOffset = IpgOffsetFromLink( linkCurr ); + ChangeCurr( pgOffset.ipg, pnodeCurr ); + } + +HandleError: + return err; +} + +ERR BBTBuff::ErrMoveNext( SeekFlags fFlags ) +{ + ERR err = JET_errSuccess; + SkipListNode* pnodeCurr = m_pnodeCurr; + + if ( pnodeCurr != NULL ) + { + Assert( pnodeCurr->IsValid() ); + + if ( fFlags == sfSkipDuplicates && pnodeCurr->FDuplicateNext0() ) + { + SkipListLink linkLatest( 0 ); + Call( ErrGetLatestInDuplicateSequence( pnodeCurr, &linkLatest ) ); + + Assert( !linkLatest.FNull() ); + Call( ErrPnodeFromLink_AcqLatch( linkLatest, &pnodeCurr ) ); + } + + // The node with FDuplicateNext0 == false is the tail of the duplicate sequence + // Have to move 1 past it to get the next unique node. + + if ( !pnodeCurr->LinkNext0().FNull() ) + { + PageOffsetTuple pgOffset = IpgOffsetFromLink( pnodeCurr->LinkNext0() ); + Call( ErrEnsurePageLatched( pgOffset.ipg, m_latchType ) ); + pnodeCurr = PnodeFromIpgOffset( pgOffset ); + ChangeCurr( pgOffset.ipg, pnodeCurr ); + + if ( fFlags == sfSkipDuplicates ) + { + Call( ErrMoveToLatestInDuplicateSequence() ); + } + + goto HandleError; + } + } + + // Currency is only reset if we move beyond the end of the skip list. + // For other errors, e.g. IO errors while latching pages, currency stays + // where it is. + ResetCurr(); + err = ErrERRCheck( errBBTNodeNotFound ); + +HandleError: + return err; +} + +ERR BBTBuff::ErrMovePrev( SeekFlags fFlags ) +{ + ERR err = JET_errSuccess; + SkipListNode* pnodeCurr = m_pnodeCurr; + + if ( pnodeCurr != NULL ) + { + Assert( pnodeCurr->IsValid() ); + + if ( !pnodeCurr->LinkPrev0().FNull() ) + { + SkipListNode* pnodePrev; + Call( ErrPnodeFromLink_AcqLatch( pnodeCurr->LinkPrev0(), &pnodePrev ) ); + + if ( fFlags == sfSkipDuplicates ) + { + while ( pnodePrev != NULL ) + { + if ( !pnodePrev->FDuplicateNext0() ) + { + ErrAssertIsLatestInDuplicateSequence( pnodePrev ); + break; + } + + pnodeCurr = pnodePrev; + Call( ErrPnodeFromLink_AcqLatch( pnodeCurr->LinkPrev0(), &pnodePrev ) ); + } + } + + if ( !pnodeCurr->LinkPrev0().FNull() ) + { + PageOffsetTuple pgOffset = IpgOffsetFromLink( pnodeCurr->LinkPrev0() ); + pnodeCurr = PnodeFromIpgOffset( pgOffset ); + + Assert( pnodeCurr->IsValid() ); + ChangeCurr( pgOffset.ipg, pnodeCurr ); + goto HandleError; + } + else + { + // We can only get here if MovePrev() on a duplicate set of nodes, that don't have a prev. + pnodeCurr->FDuplicateNext0(); + } + } + } + + // Currency is only reset if we move beyond the start of the skip list. + // For other errors, e.g. IO errors while latching pages, currency stays + // where it is. + ResetCurr(); + err = ErrERRCheck( errBBTNodeNotFound ); + +HandleError: + return err; +} + +ERR BBTBuff::ErrGetDuplicate( _Out_ SkipListNode** ppnodeDup, _Inout_ int* piDup ) +{ + ERR err = JET_errSuccess; + SkipListNode* pnodeCurr = m_pnodeCurr; + + if ( pnodeCurr != NULL ) + { + Assert( pnodeCurr->IsValid() ); + + int i = 0; + for ( i; i < *piDup; i++ ) + { + if ( !pnodeCurr->LinkPrev0().FNull() ) + { + SkipListNode* pnodePrev; + Call( ErrPnodeFromLink_AcqLatch( pnodeCurr->LinkPrev0(), &pnodePrev ) ); + Assert( pnodePrev->IsValid() ); + + if ( pnodePrev->FDuplicateNext0() ) + { + Assert( CmpKey( pnodePrev->Key(), pnodeCurr->Key() ) == 0 ); + pnodeCurr = pnodePrev; + continue; + } + } + + break; // no more duplicates + } + + Assert( 0 == m_pnodeCurr->CmpKey( pnodeCurr->Key() ) ); + *ppnodeDup = pnodeCurr; + *piDup = i; + } + else + { + Assert( false ); + Error( ErrERRCheck( errBBTCurrencyLost ) ); + } + +HandleError: + return err; +} + +ERR BBTBuff::ErrMoveToLatestInDuplicateSequence() +{ + ERR err = JET_errSuccess; + SkipListLink linkLatest; + + Assert( m_pnodeCurr && m_pnodeCurr->IsValid() ); + CallR( ErrGetLatestInDuplicateSequence( m_pnodeCurr, &linkLatest ) ); + + if ( !linkLatest.FNull() ) + { + PageOffsetTuple pgOffset = IpgOffsetFromLink( linkLatest ); + SkipListNode* pnodeCurr = PnodeFromIpgOffset( pgOffset ); // should already be latched + ChangeCurr( pgOffset.ipg, pnodeCurr ); + } + + return err; +} + +// Moves to the most recent duplicate node of the given node, +// which is the last node of the current duplicate sequence. +// Moves forward through the sequence to find the tail of the duplicate sequence. +// Returns a null link if the current node is already the latest node in the duplicate sequence. +// NOTE: The caller is responsible for releasing latches ! +ERR BBTBuff::ErrGetLatestInDuplicateSequence( SkipListNode* pnodeCurr, _Out_ SkipListLink* plinkLatestDup ) +{ + ERR err = JET_errSuccess; + SkipListLink linkCurr( 0 ); + SkipListNode* pnodeNext; + + while ( pnodeCurr->FDuplicateNext0() ) + { + CallR( ErrPnodeFromLink_AcqLatch( pnodeCurr->LinkNext0(), &pnodeNext ) ); + Assert( pnodeNext != NULL ); + linkCurr = pnodeCurr->LinkNext0(); + pnodeCurr = pnodeNext; + } + + *plinkLatestDup = linkCurr; + return err; +} + +ERR BBTBuff::ErrAssertIsLatestInDuplicateSequence( const SkipListNode* pnode ) +{ + ERR err = JET_errSuccess; + +#ifdef DEBUG + // Assert that the given node is at the tail of a duplicate sequence. + // That is, it's the latest node in the duplicate sequence. + Assert( pnode->FDuplicateNext0() == false ); + + SkipListNode* pnodeNext; + CallR( ErrPnodeFromLink_AcqLatch( pnode->LinkNext0(), &pnodeNext ) ); + if ( pnodeNext != NULL ) + { + Assert( pnode->CmpKey( pnodeNext->Key() ) < 0 ); + } +#endif + + return err; +} + +ERR BBTBuff::ErrAssertIsOldestInDuplicateSequence( const SkipListNode* pnode ) +{ + ERR err = JET_errSuccess; + +#ifdef DEBUG + SkipListNode* pnodePrev; + CallR( ErrPnodeFromLink_AcqLatch( pnode->LinkPrev0(), &pnodePrev ) ); + if ( pnodePrev != NULL ) + { + Assert( !pnodePrev->FDuplicateNext0() ); + Assert( pnodePrev->CmpKey( pnode->Key() ) < 0 ); + } +#endif + + return err; +} + +#define ErrorIf( err, expr ) if ( expr ) { Error( ErrERRCheck( err ) ); } + +ERR BBTBuff::ErrValidate() +{ + ERR err = JET_errSuccess; + KEY rgKeyPrev[ MAX_LEVELS ]; + SkipListLink rgLinksPrev[ MAX_LEVELS ]; + SkipListNode* pnodeCurr; + SkipListLink linkPrev( 0 ); + int cNodes = 0; + bool fDuplicatePrev = false; + + // Check header. + ErrorIf( JET_errBBTBuffCorrupted, m_pHeader->nVersion != bbtvInitial ); + ErrorIf( JET_errBBTBuffCorrupted, m_pHeader->cMaxPages != PFormat()->cpgInBBTBuff ); + + Call( ErrLatchAll() ); + + // Check DBTIMEs on each page of the BBTBuff. They should be the same. + DBTIME dbtime = m_pcsrBase->Dbtime(); + for ( int i = 0; i < Cpg() - 1; i++ ) + { + ErrorIf( JET_errBBTBuffCorrupted, dbtime != m_rgcsrLatched[ i ].Dbtime() ); + } + + // Start from header's skiplist links. + { + memset( rgKeyPrev, 0, sizeof( rgKeyPrev ) ); + SkipListLinkArray rgLinksHead = RgSkipListLinksHead( m_pHeader ); + for ( int i = 0; i < MAX_LEVELS; i++ ) + { + rgLinksPrev[ i ] = rgLinksHead[ i ]; + ErrorIf( JET_errBBTBuffCorrupted, rgLinksPrev[ i ].ToInt() >= m_pHeader->le_ibMicFree->ToInt() ); + } + } + + // Iterate over all nodes in the skiplist. + pnodeCurr = PnodeFromLink( rgLinksPrev[ 0 ] ); + while ( pnodeCurr != NULL ) + { + ErrorIf( JET_errBBTNodeCorrupted, !pnodeCurr->IsValid() ); + ErrorIf( JET_errBBTNodeCorrupted, pnodeCurr->Opcode() >= bbtOpUpsert ); // last valid opcode currently + + SkipListLink linkCurr = rgLinksPrev[ 0 ]; + KEY keyCurr = pnodeCurr->Key(); + + // Level0 checks. + if ( fDuplicatePrev ) + { + ErrorIf( JET_errBBTBuffCorrupted, CmpKey( rgKeyPrev[ 0 ], keyCurr ) != 0 ); + } + else + { + ErrorIf( JET_errBBTBuffCorrupted, CmpKey( rgKeyPrev[ 0 ], keyCurr ) >= 0 ); + } + + // If node is in order but the prev link of this node doesn't point to the prev node in sequence, + // assume this node's prev link is corrupt (hence JET_errBBTNodeCorrupted). + ErrorIf( JET_errBBTNodeCorrupted, linkPrev != pnodeCurr->LinkPrev0() ); + + // Check that same node should occupy every level of the prev link array upto the curr node level. + for ( int i = 1; i <= pnodeCurr->Level(); i++ ) + { + ErrorIf( JET_errBBTBuffCorrupted, linkCurr != rgLinksPrev[ i ] ); + } + // Since we've already checked keyCurr and link array, this means that rgKeyPrev[ level ] <= keyCurr for each level. + + cNodes++; + ErrorIf( JET_errBBTBuffCorrupted, cNodes > m_pHeader->le_cNodes ); + + // Check if current node protrudes into the unused region of the bbt buffer. + int ibEndOfNode = SkipListLink::Roundup( linkCurr.ToInt() + pnodeCurr->Cb() ); + ErrorIf( JET_errBBTBuffCorrupted, ibEndOfNode > m_pHeader->le_ibMicFree->ToInt() ); + + // Move state to next node. + SkipListLinkArray rgLinks = pnodeCurr->RgLinksNext(); + for ( int i = 0; i <= pnodeCurr->Level(); i++ ) + { + rgLinksPrev[ i ] = rgLinks[ i ]; + rgKeyPrev[ i ] = keyCurr; + + // Check links of the curr node. + ErrorIf( JET_errBBTNodeCorrupted, rgLinksPrev[ i ].ToInt() >= m_pHeader->le_ibMicFree->ToInt() ); + } + + linkPrev = linkCurr; + fDuplicatePrev = pnodeCurr->FDuplicateNext0(); + pnodeCurr = PnodeFromLink( rgLinksPrev[ 0 ] ); + } + + // All links should point to null at the end. + for ( int i = 0; i < MAX_LEVELS; i++ ) + { + ErrorIf( JET_errBBTBuffCorrupted, !rgLinksPrev[ i ].FNull() ); + } + +HandleError: + return err; +} + +#undef ErrorIf + +void BBTBuff::Load( + _In_ PIB* ppib, + IFMP ifmp, + _In_ CSR* pcsr, + _In_ CSRHeapArray& rgcsrBBT, + BBTBuffHeader* pbbtbHeader, + LATCH latchType ) +{ + Assert( latchType == pcsr->Latch() ); + Assert( pbbtbHeader->nVersion == bbtvInitial ); + + // Make sure that bbtbuff isn't already loaded + Assert( m_ppib == NULL && m_ifmp == ifmpNil && m_pHeader == NULL && m_pcsrBase == NULL ); + + EnforceSz( m_cMaxPages == 0 || m_cMaxPages == pbbtbHeader->cMaxPages, "BBTBuffInvalidMaxPages" ); + m_ifmt = (BYTE) IBBTBuffFormatForPage( g_cbPage ); + Assert( BBTBUFF_FORMAT_CONSTANTS[ m_ifmt ].cpgInBBTBuff < 256 ); + + m_ppib = ppib; + m_ifmp = ifmp; + m_pHeader = pbbtbHeader; + m_latchType = latchType; + m_cMaxPages = m_pHeader->cMaxPages; + + Assert( rgcsrBBT != NULL ); + Assert( rgcsrBBT.CItems() == m_pHeader->cMaxPages - 1 ); + + m_pcsrBase = pcsr; + m_rgcsrLatched = rgcsrBBT; + + // BBTBuff doesn't insert tags/ilines. It uses the page data space as one big buffer. + // But to keep CPAGE checks happy, we can't reclaim the reserved tag. + m_cbPageDataMax = CPAGE::CbPageDataMaxNoInsert( g_cbPage ); + + m_pnodeCurr = NULL; + m_ipgCurr = -1; + + EnforceSz( FIsHeaderValid(), "BBTBuffInvalidHeader" ); +} + +void BBTBuff::Unload() +{ + ResetCurr(); + m_ppib = NULL; + m_ifmp = ifmpNil; + m_pHeader = NULL; + m_pcsrBase = NULL; + m_rgcsrLatched = NULL; +} + +bool BBTBuff::FIsHeaderValid() const +{ + BYTE* pbHdr = (BYTE*) m_pHeader; + return m_pHeader != NULL && + pbHdr > PbPage( 0 ) && + pbHdr + sizeof( BBTBuffHeader ) < PbPage( 0 ) + g_cbPage && + m_pHeader->nVersion == bbtvInitial && + m_pHeader->cMaxPages == PFormat()->cpgInBBTBuff && + m_pHeader->le_cbFree >= 0 && m_pHeader->le_cbFree <= CbMax() && + m_pHeader->le_cNodes >= 0 && m_pHeader->le_cNodes < CbMax() / sizeof( SkipListNode ); +} + +bool BBTBuff::FLoaded() const +{ + Assert( m_pHeader == NULL || FIsHeaderValid() ); + return ( m_pHeader != NULL ); +} + +ERR BBTBuff::ErrLatchAll() +{ + ERR err = JET_errSuccess; + + Assert( m_pcsrBase->FLatched() ); + for ( int i = 0; i < Cpg(); i++ ) + { + CallR( ErrEnsurePageLatched( i, m_latchType ) ); + } + + return err; +} + +ERR BBTBuff::ErrWriteLatchAll() +{ + ERR err = JET_errSuccess; + + if ( latchRIW == m_pcsrBase->Latch() ) + { + m_pcsrBase->UpgradeFromRIWLatch(); + } + else if ( latchWrite != m_pcsrBase->Latch() ) + { + return ErrERRCheck( errBFLatchConflict ); + } + + for ( int i = 1; i < Cpg(); i++ ) + { + CallR( ErrEnsurePageLatched( i, latchWrite ) ); + } + + m_latchType = latchWrite; + return err; +} + +void BBTBuff::Downgrade( LATCH latchType ) +{ + Assert( m_latchType == latchWrite ); + Assert( latchType == latchReadTouch || latchType == latchRIW ); + m_latchType = latchType; + + DowngradeLatches(); +} + +ERR BBTBuff::ErrSetCurrNodeFromLink( SkipListLink link ) +{ + ERR err = JET_errSuccess; + Assert( m_pcsrBase->FLatched() ); + + if ( link.FNull() ) + { + ResetCurr(); + return ErrERRCheck( errBBTNodeNotFound ); + } + + PageOffsetTuple pgOffset = IpgOffsetFromLink( link ); + CallR( ErrEnsurePageLatched( pgOffset.ipg, m_latchType ) ); + + SkipListNode* pnode = PnodeFromIpgOffset( pgOffset ); + Assert( pnode->IsValid() ); + ChangeCurr( pgOffset.ipg, pnode ); + return err; +} + +bool BBTBuff::FCanInsert( const KEY& key, const DATA& data, _Out_ int* pcbReq ) +{ + // Check for level0 node. Insertion code will first try with a random level, then fallback to level0. + int cbNodeInsert = SkipListLink::Roundup( SkipListNode::Cb( 0, key.Cb(), data.Cb() ) ); + // UA_TODO: we can still hit this because BBTBuff nodes have a minimum 8-byte overhead vs 6-bytes for BTs. + EnforceSz( cbNodeInsert <= CbMaxNodeSize(), "BBTBuff MaxNodeSizeExceeded" ); + + if ( pcbReq ) + { + *pcbReq = cbNodeInsert; + } + + Assert( CbMax() - m_pHeader->le_ibMicFree->ToInt() >= 0 ); + if ( cbNodeInsert <= CbMax() - m_pHeader->le_ibMicFree->ToInt() ) + { + return true; + } + else + { + return false; + } +} + +void BBTBuff::InitBBTBuffHeader( _Out_ BBTBuffHeader* pbbtbHeader, _In_ int cMaxPages ) +{ + memset( pbbtbHeader, 0, sizeof( BBTBuffHeader ) ); + + pbbtbHeader->nVersion = bbtvInitial; + + Assert( cMaxPages < 256 ); + pbbtbHeader->cMaxPages = (BYTE) cMaxPages; + + pbbtbHeader->le_ibMicFree = SkipListLink::FromInt( sizeof( BBTBuffHeader ) ); // leave space for the header + pbbtbHeader->le_cbFree = CbMax() - pbbtbHeader->le_ibMicFree->ToInt(); + + // Fields zero-ed out: + // count + // rgSkipListLinksHead[] +} + +void BBTBuff::InitBBTBuffRoot( FUCB* pfucb, CSR* pcsr ) +{ + const BBTBuffFormat* pFormat = BBTBuff::PBBTBuffFormatForPage( g_cbPage ); + + // Init BBTBuff root page (base page, not tree root) + // + Assert( latchWrite == pcsr->Latch() ); + Assert( pcsr->FDirty() ); + pcsr->SetILine( 0 ); + + Assert( 0 == pcsr->Cpage().Clines() ); // must not have any data on the page + Assert( pcsr->Cpage().CbPageFree() > pFormat->cbBBTRoot ); + + // All of these flags should be set + const ULONG fReqPageFlagsRoot = ( CPAGE::fPageBBTBuff | CPAGE::fPageBBTBuffRoot ); + Assert( fReqPageFlagsRoot == ( fReqPageFlagsRoot & pcsr->Cpage().FFlags() ) ); + + int cb = sizeof( NodeResvTag ) + pFormat->cbBBTRoot; + INT itag = INDAddReservedTag( pfucb, pcsr, rtidBBTBuff, cb, 0 ); + + // The second reserved tag on the page must be the BBTBuff root tag. + EnforceSz( itag == itagReservedBBTBuffRoot, "InitBBTBuffRoot: BadReservedTag" ); + + LINE line; + GetBBTBuffRoot( *pcsr, &line ); + BBTBuff::InitBBTBuffHeader( PBBTHeader( line ), pFormat->cpgInBBTBuff ); +} + +void BBTBuff::InitBBTBuffNonRoot( CSR* pcsr ) +{ + const INT cbPageDataMax = (INT) pcsr->Cpage().CbPageDataMax(); + + // Init non-root BBTBuff pages + // + Assert( latchWrite == pcsr->Latch() ); + Assert( pcsr->FDirty() ); + + const ULONG fReqPageFlags = CPAGE::fPageBBTBuff; + Assert( fReqPageFlags == ( fReqPageFlags & pcsr->Cpage().FFlags() ) ); + + pcsr->Cpage().ResetReservedTag( 0, cbPageDataMax, 0 ); // take over external header tag +} + +void BBTBuff::GetBBTBuffRoot( const CSR& csr, LINE* pline ) +{ + Assert( csr.FLatched() ); + Assert( csr.Cpage().FBBTBuffRootPage() ); // can only be a root or internal page + + csr.Cpage().GetPtrReservedTag( itagReservedBBTBuffRoot, pline ); + NodeResvTag* pResvTag = (NodeResvTag*) pline->pv; + + Assert( rtidBBTBuff == pResvTag->resvTagId ); + Assert( pline->cb == sizeof( NodeResvTag ) + (ULONG) BBTBuff::PBBTBuffFormatForPage( g_cbPage )->cbBBTRoot ); +} + +BBTBuffHeader* BBTBuff::PBBTHeader( const LINE& line ) +{ + NodeResvTag* pResvTag = (NodeResvTag*) line.pv; + Assert( rtidBBTBuff == pResvTag->resvTagId ); + Assert( line.cb == sizeof( NodeResvTag ) + (ULONG) BBTBuff::PBBTBuffFormatForPage( g_cbPage )->cbBBTRoot ); + return (BBTBuffHeader*) pResvTag->rgb; +} + + +/////////////////////////////////////////////////////////////////////////////// +// Debug helpers + +std::string DumpBBTBuffHeader( const BBTBuffHeader* pHeader ) +{ + std::string str = ""; + char buff[ 256 ]; + + sprintf_s( buff, FORMAT_UINT( BBTBuffHeader, pHeader, nVersion, 0 ) ); + str.append( buff ); + + sprintf_s( buff, FORMAT_UINT( BBTBuffHeader, pHeader, cMaxPages, 0 ) ); + str.append( buff ); + + sprintf_s( buff, FORMAT_( BBTBuffHeader, pHeader, le_ibMicFree, 0 ) ); + str.append( buff ); + sprintf_s( buff, "%d (0x%X)\n", pHeader->le_ibMicFree->ToInt(), pHeader->le_ibMicFree->ToInt() ); + str.append( buff ); + + sprintf_s( buff, FORMAT_INT( BBTBuffHeader, pHeader, le_cbFree, 0 ) ); + str.append( buff ); + + sprintf_s( buff, FORMAT_UINT( BBTBuffHeader, pHeader, le_cNodes, 0 ) ); + str.append( buff ); + + return str; +} + +std::string DumpBBTBuff( const BBTBuff& bbtBuff, INT level, INT ib /* = 0 */ ) +{ + std::string str; + char buff[ 256 ]; + + str.reserve( 4096 ); // just get some initial size going + + int pgnoFirst = bbtBuff.Pcsr( 0 )->Pgno(); + int pgnoLast = bbtBuff.Pcsr( bbtBuff.Cpg() - 1 )->Pgno(); + sprintf_s( buff, "BBT Buffer Superpage spanning pgnos [%d (0x%x), %d (0x%x)]\n\n", pgnoFirst, pgnoFirst, pgnoLast, pgnoLast ); + str.append( buff ); + str.append( DumpBBTBuffHeader( bbtBuff.m_pHeader ) ); + str += '\n'; + + __try + { + SkipListLink linkCurr = ( ib == 0 ) ? RgSkipListLinksHead( bbtBuff.m_pHeader )[ 0 ] : SkipListLink::FromInt( ib ); + while ( !linkCurr.FNull() ) + { + const SkipListNode* pnodeCurr = bbtBuff.PnodeFromLink( linkCurr ); + str.append( bbtBuff.PnodeCurr() == pnodeCurr ? "-->" : " " ); + str.append( level == 0 && pnodeCurr->FDuplicateNext0() ? "D " : " " ); + str.append( pnodeCurr->Key().ToString() ); + sprintf_s( buff, ", Op( %d ), Level( %d ), Cb( %d ), Data( %d ) ", + pnodeCurr->Opcode(), + pnodeCurr->Level(), + pnodeCurr->Cb(), + pnodeCurr->CbData() ); + str.append( buff ); + + SkipListLink linkNext; + if ( level == 0 ) + { + linkNext = pnodeCurr->LinkNext0(); + sprintf_s( buff, "Link( %d ), Prev0( %d ), Next0( %d ): ", linkCurr.ToInt(), pnodeCurr->LinkPrev0().ToInt(), linkNext.ToInt() ); + } + else + { + int maxLevel = min( level, pnodeCurr->Level() ); + linkNext = pnodeCurr->RgLinksNext()[ maxLevel ]; + sprintf_s( buff, "Link( %d ), Next%d( %d ): ", linkCurr.ToInt(), maxLevel, linkNext.ToInt() ); + } + str.append( buff ); + + DATA data = pnodeCurr->Data(); + bool fLargeData = data.Cb() > 16; + data.SetCb( fLargeData ? 16 : data.Cb() ); + str.append( data.ToString() ); + str.append( fLargeData ? "...\n" : "\n" ); + + linkCurr = linkNext; + } + } + __except ( EXCEPTION_EXECUTE_HANDLER ) + { + sprintf_s( buff, "\n ---- Encountered expcetion: 0x%x ----\n", GetExceptionCode() ); + str.append( buff ); + } + + return str; + +// Force includes the function even if there are no calls to it. +// This allows the function to be available for debugging in VS. +#pragma comment(linker, "/include:" __FUNCDNAME__) +} + diff --git a/dev/ese/src/ese/bbtbuff_test.cxx b/dev/ese/src/ese/bbtbuff_test.cxx new file mode 100644 index 00000000..78c918b5 --- /dev/null +++ b/dev/ese/src/ese/bbtbuff_test.cxx @@ -0,0 +1,1151 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#include "std.hxx" +#include "bbtbuffwriter.hxx" + +#ifndef ENABLE_JET_UNIT_TEST +#error This file should only be compiled with the unit tests! +#endif // !ENABLE_JET_UNIT_TEST + +#define CHECK_CALL( expr ) CHECK( JET_errSuccess == ( expr ) ) + +class BBTBuffTrxLog_Test : public IBBTBuffTrxLog +{ +public: + ERR m_returnErr = JET_errSuccess; // error to return from trx log for testing + + BBTBuffTrxLog_Test( BBTBuff* pbbtbuff ) : IBBTBuffTrxLog( pbbtbuff ) + {} + + ERR ErrLogInsert( + const BBTBuffChangeContext& changeCtx, + BBTBuffOpcode opcode, + const KEY& key, + const DATA& data, + SkipListNodeFlags flags ) + { + return m_returnErr; + } + + ERR ErrLogDelete( + const BBTBuffChangeContext& changeCtx, + const SkipListNode* pnodeDel ) + { + return m_returnErr; + } + + ERR ErrLogRangeDelete( + DBTIME dbtimeBefore, + DBTIME dbtimeCurr, + SkipListLink linkFirst, + int cNodes ) + { + return m_returnErr; + } + + ERR ErrLogMergeAndDel( + DBTIME dbtimeBefore, + DBTIME dbtimeCurr, + const SkipListNode** rgNodesMerged, + SkipListLink* rgLinksDel, + int cNodesMerged, + int cNodesDel, + SkipListLink linkIbMergeStart ) + { + return m_returnErr; + } +}; + +using BBTBuffWriter_Test = BBTBuffWriter; + +class BBTBuffTestFixture +{ +public: + CSRHeapArray m_rgcsr; + BBTBuff m_bbtBuff; + BBTBuffTrxLog_Test m_bbtBuffTrxLogger; + BBTBuffWriter_Test m_bbtBuffWriter; + BBTBuffHeader* m_pbbtHeader; + + BBTBuffTestFixture() : + m_rgcsr( CSRHeapArray::MakeArray( BBTBuff::PBBTBuffFormatForPage( g_cbPage )->cpgInBBTBuff ) ), + m_bbtBuffTrxLogger( &m_bbtBuff ), + m_bbtBuffWriter( &m_bbtBuff, &m_bbtBuffTrxLogger ) + { + m_rgcsr[ 0 ].LoadNewTestPage( g_cbPage, ifmpNil ); + m_rgcsr[ 0 ].Cpage().SetFlags( m_rgcsr[ 0 ].Cpage().FFlags() | CPAGE::fPageBBTBuffRoot | CPAGE::fPageBBTBuff ); + BBTBuff::InitBBTBuffRoot( NULL, &m_rgcsr[ 0 ] ); + + for ( int i = 1; i < m_rgcsr.CItems(); i++ ) + { + m_rgcsr[ i ].LoadNewTestPage( g_cbPage, ifmpNil, m_rgcsr[ 0 ].Pgno() + i ); + m_rgcsr[ i ].Cpage().SetFlags( m_rgcsr[ i ].Cpage().FFlags() | CPAGE::fPageBBTBuff ); + BBTBuff::InitBBTBuffNonRoot( &m_rgcsr[ i ] ); + } + + LINE line; + BBTBuff::GetBBTBuffRoot( m_rgcsr[ 0 ], &line ); + m_pbbtHeader = BBTBuff::PBBTHeader( line ); + m_bbtBuff.Load( NULL, ifmpNil, &m_rgcsr[ 0 ], CSRHeapArray( m_rgcsr.Subarray( 1 ) ), m_pbbtHeader, latchWrite ); + } + + ERR ErrHotpointInsert( int iKeyStep, int cHotpoints, int level = -1 ) + { + ERR err; + BigEndian be_key = iKeyStep; + int iData = 0; + KEY key; + DATA data; + + key.Nullify(); + key.suffix.SetPv( &be_key ); + key.suffix.SetCb( sizeof( be_key ) ); + data.Nullify(); + data.SetPv( &iData ); + data.SetCb( sizeof( iData ) ); + + m_bbtBuffTrxLogger.m_dbtimeCoordinated = dbtimeStart + 10; + CallR( m_bbtBuffWriter.ErrUpgradeToWriteMode() ); + + int iKeyBound = ( cHotpoints + 1 ) * iKeyStep; + do + { + m_bbtBuff.SetLevelSeed( level ); // override next node level for testing (-1 means no override) + err = m_bbtBuffWriter.ErrInsert( bbtOpInsert, key, data, fSLNNone ); + be_key = be_key % iKeyBound; + be_key += iKeyStep; + iData += ( be_key == iKeyStep ? 1 : 0 ); // increment data on every wrap-around + } while ( err == JET_errSuccess ); + + return err; + } + + ERR ErrRandomInsert( int count = INT_MAX ) + { + ERR err; + BigEndian be_key = 0; + int iData = 0; + KEY key; + DATA data; + + key.Nullify(); + key.suffix.SetPv( &be_key ); + key.suffix.SetCb( sizeof( be_key ) ); + data.Nullify(); + data.SetPv( &iData ); + data.SetCb( sizeof( iData ) ); + + m_bbtBuffTrxLogger.m_dbtimeCoordinated = dbtimeStart + 10; + CallR( m_bbtBuffWriter.ErrUpgradeToWriteMode() ); + + int cInserted = 0; + do + { + be_key = rand(); + err = m_bbtBuffWriter.ErrInsert( bbtOpInsert, key, data, fSLNNone ); + iData += 10; + cInserted++; + } while ( err == JET_errSuccess && cInserted < count ); + + return err; + } + + ERR ErrDeleteKey( int ikey ) + { + ERR err; + BigEndian be_key = ikey; + KEY key; + key.Nullify(); + key.suffix.SetPv( &be_key ); + key.suffix.SetCb( sizeof( be_key ) ); + + while ( JET_errSuccess == ( err = m_bbtBuffWriter.ErrDelete( key ) ) ) + {} + + return err == errBBTNodeNotFound ? JET_errSuccess : err; + } + + template + ERR ErrKeyForEachOldestToLatest( int ikey, TFunc func ) + { + ERR err; + BigEndian be_key = ikey; + KEY key; + key.Nullify(); + key.suffix.SetPv( &be_key ); + key.suffix.SetCb( sizeof( be_key ) ); + + CallR( m_bbtBuff.ErrSeekGEQOldest( key ) ); + do + { + func( m_bbtBuff.PnodeCurr() ); + } + while ( m_bbtBuff.PnodeCurr()->FDuplicateNext0() && JET_errSuccess == ( err = m_bbtBuff.ErrMoveNext( BBTBuff::sfAllowDuplicates ) ) ); + + return err; + } +}; + +// ================================================================ +JETUNITTEST( BBTBuff, DmlBasic ) +// ================================================================ +{ + SetParam( pinstNil, ppibNil, JET_paramDatabasePageSize, 32768, NULL ); + + BBTBuffTestFixture tf; + BigEndian be_key = 42; + int iData = 10; + KEY key; + DATA data; + + key.Nullify(); + key.suffix.SetPv( &be_key ); + key.suffix.SetCb( sizeof( be_key ) ); + data.Nullify(); + data.SetPv( &iData ); + data.SetCb( sizeof( iData ) ); + + tf.m_bbtBuffTrxLogger.m_dbtimeCoordinated = dbtimeStart + 10; + + for ( int i = 0; i < 10; i++ ) + { + CHECK_CALL( tf.m_bbtBuffWriter.ErrInsert( bbtOpInsert, key, data, fSLNNone ) ); + be_key++; + iData += 10; + } + + be_key = 45; + iData = 40; + CHECK_CALL( tf.m_bbtBuff.ErrSeekLEQ( key ) ); + CHECK( 0 == CmpData( tf.m_bbtBuff.PnodeCurr()->Data(), data ) ); + + CHECK_CALL( tf.m_bbtBuffWriter.ErrDelete( key ) ); + CHECK( wrnNDFoundLess == tf.m_bbtBuff.ErrSeekLEQ( key ) ); + + be_key = 44; + iData = 30; + CHECK( 0 == tf.m_bbtBuff.PnodeCurr()->CmpKey(key ) ); + CHECK( 0 == CmpData( tf.m_bbtBuff.PnodeCurr()->Data(), data ) ); +} + +// ================================================================ +JETUNITTEST( BBTBuff, Append ) +// ================================================================ +{ + SetParam( pinstNil, ppibNil, JET_paramDatabasePageSize, 32768, NULL ); + + BBTBuffTestFixture tf; + BigEndian be_key = 0; + int iData = 0; + KEY key; + DATA data; + + key.Nullify(); + key.suffix.SetPv( &be_key ); + key.suffix.SetCb( sizeof( be_key ) ); + data.Nullify(); + data.SetPv( &iData ); + data.SetCb( sizeof( iData ) ); + + tf.m_bbtBuffTrxLogger.m_dbtimeCoordinated = dbtimeStart + 10; + CHECK_CALL( tf.m_bbtBuffWriter.ErrUpgradeToWriteMode() ); + + ERR err = JET_errSuccess; + do + { + err = tf.m_bbtBuffWriter.ErrInsert( bbtOpInsert, key, data, fSLNNone ); + CHECK( err == JET_errSuccess || err == errBBTBuffFull ); + be_key++; + iData += 10; + } + while ( err == JET_errSuccess ); + + be_key--; + wprintf( L"%d nodes inserted. ", (int) be_key ); + CHECK( tf.m_pbbtHeader->le_cNodes == (int) be_key ); + CHECK( tf.m_pbbtHeader->le_ibMicFree->ToInt() + SkipListNode::Cb( 0, key.Cb(), data.Cb() ) > tf.m_bbtBuff.CbMax() ); +} + +// ================================================================ +JETUNITTEST( BBTBuff, HotpointInsert ) +// ================================================================ +{ + SetParam( pinstNil, ppibNil, JET_paramDatabasePageSize, 32768, NULL ); + + BBTBuffTestFixture tf; + CHECK( errBBTBuffFull == tf.ErrHotpointInsert( 100, 16 ) ); + + wprintf( L"%d nodes inserted. ", (INT) tf.m_pbbtHeader->le_cNodes ); + CHECK( tf.m_pbbtHeader->le_ibMicFree->ToInt() + SkipListNode::Cb( 0, sizeof( int ), sizeof( int ) ) > tf.m_bbtBuff.CbMax() ); +} + +JETUNITTEST( BBTBuff, InsertMaxNodeSize ) +{ + SetParam( pinstNil, ppibNil, JET_paramDatabasePageSize, 32768, NULL ); + + BBTBuffTestFixture tf; + KEY key; + DATA data; + unique_ptr pkey( new BYTE[ cbKeyMostMost ] ); + unique_ptr pdata( new BYTE[ g_cbPage ] ); + int cbMaxSize = tf.m_bbtBuff.CbMaxNodeSize() - SkipListNode::Cb( 0, 0, 0 ); + + memset( pkey.get(), 0xdada, cbKeyMostMost); + memset( pdata.get(), 0xd0d0, g_cbPage); + + key.Nullify(); + key.suffix.SetPv( pkey.get() ); + key.suffix.SetCb( cbKeyMostMost ); + data.Nullify(); + data.SetPv( pdata.get() ); + data.SetCb( cbMaxSize - cbKeyMostMost ); + + tf.m_bbtBuffTrxLogger.m_dbtimeCoordinated = dbtimeStart + 10; + CHECK_CALL( tf.m_bbtBuffWriter.ErrUpgradeToWriteMode() ); + CHECK_CALL( tf.m_bbtBuffWriter.ErrInsert( bbtOpInsert, key, data, fSLNNone ) ); + + // Insert 1 byte more than max supported node. + // Should enforce. + bool fEnforce = false; + data.SetCb( data.Cb() + 1 ); + + __try + { + tf.m_bbtBuffWriter.ErrInsert( bbtOpInsert, key, data, fSLNNone ); + } + __except ( JetTestEnforceSEHException::Filter( GetExceptionInformation() ) ) + { + fEnforce = true; + JetTestEnforceSEHException::Cleanup(); + } + + CHECK( fEnforce ); +} + +// ================================================================ +JETUNITTEST( BBTBuff, ScanForward ) +// ================================================================ +{ + SetParam( pinstNil, ppibNil, JET_paramDatabasePageSize, 32768, NULL ); + + ERR err = JET_errSuccess; + BBTBuffTestFixture tf; + CHECK( errBBTBuffFull == tf.ErrRandomInsert() ); + + int nodes = tf.m_bbtBuff.CNodes(); + KEY keyPrev; + keyPrev.Nullify(); + + // Scan all nodes including duplicates. + CHECK_CALL( tf.m_bbtBuff.ErrMoveFirst( BBTBuff::sfAllowDuplicates ) ); + for ( int i = 0; i < nodes; i++ ) + { + SkipListNode* pnode = tf.m_bbtBuff.PnodeCurr(); + CHECK( pnode->CmpKey( keyPrev ) >= 0 ); + keyPrev = pnode->Key(); + err = tf.m_bbtBuff.ErrMoveNext( BBTBuff::sfAllowDuplicates ); + } + + wprintf( L"Scanned %d nodes. ", nodes ); + CHECK( err == errBBTNodeNotFound ); +} + +// ================================================================ +JETUNITTEST( BBTBuff, ScanForwardUnique ) +// ================================================================ +{ + SetParam( pinstNil, ppibNil, JET_paramDatabasePageSize, 32768, NULL ); + + ERR err = JET_errSuccess; + BBTBuffTestFixture tf; + CHECK( errBBTBuffFull == tf.ErrHotpointInsert( 100, 16 ) ); + + int iData = 0; + KEY keyPrev; + DATA data; + keyPrev.Nullify(); + data.Nullify(); + data.SetPv( &iData ); + data.SetCb( sizeof( iData ) ); + + // Scan unique nodes (latest versions). + int nodes = 0; + CHECK_CALL( tf.m_bbtBuff.ErrMoveFirst( BBTBuff::sfSkipDuplicates ) ); + do + { + SkipListNode* pnode = tf.m_bbtBuff.PnodeCurr(); + CHECK( pnode->CmpKey( keyPrev ) > 0 ); + CHECK( CmpData( data, pnode->Data() ) < 0 ); + keyPrev = pnode->Key(); + nodes++; + } + while( JET_errSuccess == ( err = tf.m_bbtBuff.ErrMoveNext( BBTBuff::sfSkipDuplicates ) ) ); + + wprintf( L"Scanned %d unique nodes. ", nodes ); + CHECK( err == errBBTNodeNotFound ); +} + +// ================================================================ +JETUNITTEST( BBTBuff, ScanReverse ) +// ================================================================ +{ + SetParam( pinstNil, ppibNil, JET_paramDatabasePageSize, 32768, NULL ); + + ERR err = JET_errSuccess; + BBTBuffTestFixture tf; + CHECK( errBBTBuffFull == tf.ErrRandomInsert() ); + + int nodes = tf.m_bbtBuff.CNodes(); + KEY keyPrev; + keyPrev.Nullify(); + + CHECK_CALL( tf.m_bbtBuff.ErrMoveLast() ); + for ( int i = 0; i < nodes; i++ ) + { + SkipListNode* pnode = tf.m_bbtBuff.PnodeCurr(); + CHECK( pnode->CmpKey( keyPrev ) <= 0 || keyPrev.FNull() ); + keyPrev = pnode->Key(); + err = tf.m_bbtBuff.ErrMovePrev( BBTBuff::sfAllowDuplicates ); + } + + wprintf( L"Scanned %d nodes. ", nodes ); + CHECK( err == errBBTNodeNotFound ); +} + +// ================================================================ +JETUNITTEST( BBTBuff, ScanReverseUnique ) +// ================================================================ +{ + SetParam( pinstNil, ppibNil, JET_paramDatabasePageSize, 32768, NULL ); + + ERR err = JET_errSuccess; + BBTBuffTestFixture tf; + CHECK( errBBTBuffFull == tf.ErrHotpointInsert( 100, 16 ) ); + + int iData = 0; + KEY keyPrev; + DATA data; + keyPrev.Nullify(); + data.Nullify(); + data.SetPv( &iData ); + data.SetCb( sizeof( iData ) ); + + // Scan unique nodes (latest versions). + int nodes = 0; + CHECK_CALL( tf.m_bbtBuff.ErrMoveLast() ); + do + { + SkipListNode* pnode = tf.m_bbtBuff.PnodeCurr(); + CHECK( pnode->CmpKey( keyPrev ) < 0 || keyPrev.FNull() ); + CHECK( CmpData( data, pnode->Data() ) < 0 ); + keyPrev = pnode->Key(); + nodes++; + } while ( JET_errSuccess == ( err = tf.m_bbtBuff.ErrMovePrev( BBTBuff::sfSkipDuplicates ) ) ); + + wprintf( L"Scanned %d unique nodes. ", nodes ); + CHECK( err == errBBTNodeNotFound ); +} + +// ================================================================ +JETUNITTEST( BBTBuff, SeekModes ) +// ================================================================ +{ + BBTBuffTestFixture tf; + CHECK( errBBTBuffFull == tf.ErrHotpointInsert( 100, 16 ) ); + + BigEndian be_key; + int iData = 0; + KEY key; + DATA data; + key.Nullify(); + key.suffix.SetPv( &be_key ); + key.suffix.SetCb( sizeof( be_key ) ); + data.SetPv( &iData ); + data.SetCb( sizeof( iData ) ); + + SkipListLink rgLinks[ MAX_LEVELS ]; + SkipListNode* rgpNodes[ MAX_LEVELS ]; + + auto testSeek = [&]( int iKey, BBTBuff::SeekMode seekMode, BBTBuff::SeekPos seekPos, int iExpectedResult, bool fNodeFound ) + { + int iSeekResult; + memset( rgLinks, 0, sizeof( rgLinks ) ); + memset( rgpNodes, 0, sizeof( rgpNodes ) ); + be_key = iKey; + + CHECK_CALL( tf.m_bbtBuff.ErrSeek_( key, seekMode, seekPos, &iSeekResult, rgLinks, rgpNodes ) ); + CHECK( iSeekResult * iExpectedResult > 0 || iSeekResult == iExpectedResult ); // both have the same sign, or they are 0 + + SkipListNode* pnodeSeeked = rgpNodes[ 0 ]; + CHECK( !!pnodeSeeked == fNodeFound ); + + if ( pnodeSeeked == NULL ) + { + CHECK_CALL( tf.m_bbtBuff.ErrMoveFirst( BBTBuff::sfAllowDuplicates ) ); + pnodeSeeked = tf.m_bbtBuff.PnodeCurr(); + } + else if ( seekPos == BBTBuff::SeekPos::Prev ) + { + // Returned node is prev to the node where the seek landed. + CHECK( pnodeSeeked->FDuplicateNext0() ); // each key in the test has duplicates, so prev node should always be a duplicate + CHECK_CALL( tf.m_bbtBuff.ErrSetCurrNodeFromLink( pnodeSeeked->LinkNext0() ) ); + pnodeSeeked = tf.m_bbtBuff.PnodeCurr(); + } + + int cmpActual = CmpKey( pnodeSeeked->Key(), key ); + CHECK( ( cmpActual * iExpectedResult ) > 0 || cmpActual == iExpectedResult ); // both have the same sign, or they are 0 + + (void)tf.m_bbtBuff.ErrSetCurrNodeFromLink( rgLinks[ 0 ] ); + CHECK( tf.m_bbtBuff.PnodeCurr() == rgpNodes[ 0 ] ); + + int i = 0; + if ( fNodeFound ) + { + int level = pnodeSeeked->Level(); + if ( seekPos == BBTBuff::SeekPos::Prev ) + { + SkipListLink linkSeeked = rgpNodes[ 0 ]->LinkNext0(); + for ( i = 0; i <= level; i++ ) + { + // For SeekPos::Prev, returned nodes should be strictly the prev node to seeked node + // at current level. + CHECK( linkSeeked == rgpNodes[ i ]->RgLinksNext()[ i ] ); + (void) tf.m_bbtBuff.ErrSetCurrNodeFromLink( rgLinks[ i ] ); + CHECK( tf.m_bbtBuff.PnodeCurr() == rgpNodes[ i ] ); + } + } + else + { + for ( i = 1; i <= level; i++ ) + { + CHECK( pnodeSeeked == rgpNodes[ i ] ); + CHECK( rgLinks[ i - 1 ] == rgLinks[ i ] ); + } + + CHECK( rgpNodes[ i ] != rgpNodes[ i - 1 ] ); // can't be the same node, levels are different + } + + for ( i; i < MAX_LEVELS; i++ ) + { + if ( rgpNodes[ i ] != NULL ) + { + // This node must be a prev/same node to lower level node + if ( rgpNodes[ i ] != rgpNodes[ i - 1 ] ) + { + int cmp = CmpKey( rgpNodes[ i ]->Key(), rgpNodes[ i - 1 ]->Key() ); + CHECK( cmp < 0 || ( cmp == 0 && + ( (int*) rgpNodes[ i ]->Data().Pv() ) <= ( (int*) rgpNodes[ i - 1 ]->Data().Pv() ) ) ); + } + + (void) tf.m_bbtBuff.ErrSetCurrNodeFromLink( rgLinks[ i ] ); + CHECK( tf.m_bbtBuff.PnodeCurr() == rgpNodes[ i ] ); + + (void) tf.m_bbtBuff.ErrSetCurrNodeFromLink( rgLinks[ i - 1 ] ); + CHECK( tf.m_bbtBuff.PnodeCurr() == rgpNodes[ i - 1 ] ); + } + else + { + break; + } + } + } + + for ( i; i < MAX_LEVELS; i++ ) + { + CHECK( rgpNodes[ i ] == NULL ); + CHECK( rgLinks[ i ] == NULL ); + } + }; + + // Seek before first key. + testSeek( 50, BBTBuff::SeekMode::LT, BBTBuff::SeekPos::Curr, 1, false ); + testSeek( 50, BBTBuff::SeekMode::LEQ, BBTBuff::SeekPos::Curr, 1, false ); + testSeek( 50, BBTBuff::SeekMode::LT, BBTBuff::SeekPos::Prev, 1, false ); + testSeek( 50, BBTBuff::SeekMode::LEQ, BBTBuff::SeekPos::Prev, 1, false ); + + // Seek to first key. + testSeek( 100, BBTBuff::SeekMode::LT, BBTBuff::SeekPos::Curr, 0, false ); + testSeek( 100, BBTBuff::SeekMode::LEQ, BBTBuff::SeekPos::Curr, 0, true ); + testSeek( 100, BBTBuff::SeekMode::LT, BBTBuff::SeekPos::Prev, 0, false ); + testSeek( 100, BBTBuff::SeekMode::LEQ, BBTBuff::SeekPos::Prev, 0, true ); // will land on older duplicate version of first key + + // Seek before some key in the middle. + testSeek( 950, BBTBuff::SeekMode::LT, BBTBuff::SeekPos::Curr, -1, true ); + testSeek( 950, BBTBuff::SeekMode::LEQ, BBTBuff::SeekPos::Curr, -1, true ); + testSeek( 950, BBTBuff::SeekMode::LT, BBTBuff::SeekPos::Prev, -1, true ); + testSeek( 950, BBTBuff::SeekMode::LEQ, BBTBuff::SeekPos::Prev, -1, true ); + + // Seek to some key in the middle. + testSeek( 1100, BBTBuff::SeekMode::LT, BBTBuff::SeekPos::Curr, -1, true ); + testSeek( 1100, BBTBuff::SeekMode::LEQ, BBTBuff::SeekPos::Curr, 0, true ); + testSeek( 1100, BBTBuff::SeekMode::LT, BBTBuff::SeekPos::Prev, -1, true ); + testSeek( 1100, BBTBuff::SeekMode::LEQ, BBTBuff::SeekPos::Prev, 0, true ); + + // Seek after last. + testSeek( 1800, BBTBuff::SeekMode::LT, BBTBuff::SeekPos::Curr, -1, true ); + testSeek( 1800, BBTBuff::SeekMode::LEQ, BBTBuff::SeekPos::Curr, -1, true ); + testSeek( 1800, BBTBuff::SeekMode::LT, BBTBuff::SeekPos::Curr, -1, true ); + testSeek( 1800, BBTBuff::SeekMode::LEQ, BBTBuff::SeekPos::Curr, -1, true ); +} + +// ================================================================ +JETUNITTEST( BBTBuff, SeekLEQ ) +// ================================================================ +{ + SetParam( pinstNil, ppibNil, JET_paramDatabasePageSize, 32768, NULL ); + + BBTBuffTestFixture tf; + CHECK( errBBTBuffFull == tf.ErrHotpointInsert( 100, 16 ) ); + + BigEndian be_key; + int iData = 0; + KEY key; + DATA data; + key.Nullify(); + key.suffix.SetPv( &be_key ); + key.suffix.SetCb( sizeof( be_key ) ); + data.SetPv( &iData ); + data.SetCb( sizeof( iData ) ); + + // Seek equal. + be_key = 600; + CHECK( JET_errSuccess == tf.m_bbtBuff.ErrSeekLEQ( key ) ); + CHECK( tf.m_bbtBuff.PnodeCurr()->CmpKey( key ) == 0 ); + CHECK( CmpData( data, tf.m_bbtBuff.PnodeCurr()->Data() ) < 0 ); // must seek to latest version + CHECK_CALL( tf.m_bbtBuff.ErrMoveNext( BBTBuff::sfAllowDuplicates ) ); + CHECK( tf.m_bbtBuff.PnodeCurr()->CmpKey( key ) > 0 ); // latest node in the sequence means next node can't have the same key + + // Seek LT. + be_key = 599; + CHECK( wrnNDFoundLess == tf.m_bbtBuff.ErrSeekLEQ( key ) ); + be_key = 500; + CHECK( tf.m_bbtBuff.PnodeCurr()->CmpKey( key ) == 0 ); + CHECK( CmpData( data, tf.m_bbtBuff.PnodeCurr()->Data() ) < 0 ); + CHECK_CALL( tf.m_bbtBuff.ErrMoveNext( BBTBuff::sfAllowDuplicates ) ); + CHECK( tf.m_bbtBuff.PnodeCurr()->CmpKey( key ) > 0 ); + + // Seek to lowest key. + be_key = 0; + CHECK( wrnNDFoundGreater == tf.m_bbtBuff.ErrSeekLEQ( key ) ); + be_key = 100; + CHECK( tf.m_bbtBuff.PnodeCurr()->CmpKey( key ) == 0 ); + CHECK( CmpData( data, tf.m_bbtBuff.PnodeCurr()->Data() ) < 0 ); + CHECK_CALL( tf.m_bbtBuff.ErrMoveNext( BBTBuff::sfAllowDuplicates ) ); + CHECK( tf.m_bbtBuff.PnodeCurr()->CmpKey( key ) > 0 ); + + // Seek to highest key. + be_key = INT_MAX; + CHECK( wrnNDFoundLess == tf.m_bbtBuff.ErrSeekLEQ( key ) ); + be_key = 1700; + CHECK( tf.m_bbtBuff.PnodeCurr()->CmpKey( key ) == 0 ); + CHECK( CmpData( data, tf.m_bbtBuff.PnodeCurr()->Data() ) < 0 ); + CHECK( errBBTNodeNotFound == tf.m_bbtBuff.ErrMoveNext( BBTBuff::sfAllowDuplicates ) ); +} + +// ================================================================ +JETUNITTEST( BBTBuff, SeekGEQOldest ) +// ================================================================ +{ + SetParam( pinstNil, ppibNil, JET_paramDatabasePageSize, 32768, NULL ); + + BBTBuffTestFixture tf; + CHECK( errBBTBuffFull == tf.ErrHotpointInsert( 100, 16 ) ); + + BigEndian be_key; + int iData = 0; + KEY key; + DATA data; + key.Nullify(); + key.suffix.SetPv( &be_key ); + key.suffix.SetCb( sizeof( be_key ) ); + data.SetPv( &iData ); + data.SetCb( sizeof( iData ) ); + + // Seek equal. + be_key = 700; + CHECK( JET_errSuccess == tf.m_bbtBuff.ErrSeekGEQOldest( key ) ); + CHECK( tf.m_bbtBuff.PnodeCurr()->CmpKey( key ) == 0 ); + CHECK( CmpData( data, tf.m_bbtBuff.PnodeCurr()->Data() ) == 0 ); // must seek to oldest version + CHECK_CALL( tf.m_bbtBuff.ErrMovePrev( BBTBuff::sfAllowDuplicates ) ); + CHECK( tf.m_bbtBuff.PnodeCurr()->CmpKey( key ) < 0 ); // oldest node in the sequence means prev node can't have the same key + + // Seek GT. + be_key = 599; + CHECK( wrnNDFoundGreater == tf.m_bbtBuff.ErrSeekGEQOldest( key ) ); + be_key = 600; + CHECK( tf.m_bbtBuff.PnodeCurr()->CmpKey( key ) == 0 ); + CHECK( CmpData( data, tf.m_bbtBuff.PnodeCurr()->Data() ) == 0 ); + CHECK_CALL( tf.m_bbtBuff.ErrMovePrev( BBTBuff::sfAllowDuplicates ) ); + CHECK( tf.m_bbtBuff.PnodeCurr()->CmpKey( key ) < 0 ); + + // Seek to lowest key. + be_key = 0; + CHECK( wrnNDFoundGreater == tf.m_bbtBuff.ErrSeekGEQOldest( key ) ); + be_key = 100; + CHECK( tf.m_bbtBuff.PnodeCurr()->CmpKey( key ) == 0 ); + CHECK( CmpData( data, tf.m_bbtBuff.PnodeCurr()->Data() ) == 0 ); + CHECK( errBBTNodeNotFound == tf.m_bbtBuff.ErrMovePrev( BBTBuff::sfAllowDuplicates ) ); + + // Seek to highest key. + // Expected: this will return node not found. Will not seek to a smaller key + // (unlike ErrSeekLEQ() which seeks to a GT key if no LEQ keys are found). + be_key = INT_MAX; + CHECK( errBBTNodeNotFound == tf.m_bbtBuff.ErrSeekGEQOldest( key ) ); +} + +// ================================================================ +JETUNITTEST( BBTBuff, Delete ) +// ================================================================ +{ + SetParam( pinstNil, ppibNil, JET_paramDatabasePageSize, 32768, NULL ); + + BBTBuffTestFixture tf; + CHECK( errBBTBuffFull == tf.ErrHotpointInsert( 100, 16 ) ); + + ERR err; + BigEndian be_key; + int iData = 0; + int cNodesBegin = tf.m_pbbtHeader->le_cNodes; + int cNodesDel = 0; + KEY key; + key.Nullify(); + key.suffix.SetPv( &be_key ); + key.suffix.SetCb( sizeof( be_key ) ); + + // Get latest version's data. + be_key = 1400; + CHECK_CALL( tf.m_bbtBuff.ErrSeekLEQ( key ) ); + iData = *( (int*) tf.m_bbtBuff.PnodeCurr()->Data().Pv() ); + CHECK( iData != 0 ); + + // Delete all versions of a node (latest to oldest). + for ( int i = iData; i > 0; i-- ) + { + CHECK_CALL( tf.m_bbtBuffWriter.ErrDelete( key ) ); + CHECK_CALL( tf.m_bbtBuff.ErrSeekLEQ( key ) ); + int iDataCurr = *( (int*) tf.m_bbtBuff.PnodeCurr()->Data().Pv() ); + CHECK( iDataCurr == i - 1 ); + cNodesDel++; + } + + // Last version. + CHECK_CALL( tf.m_bbtBuffWriter.ErrDelete( key ) ); + CHECK( wrnNDFoundLess == tf.m_bbtBuff.ErrSeekLEQ( key ) ); + cNodesDel++; + + // Delete all versions of first node. + be_key = 100; + while ( JET_errSuccess == ( err = tf.m_bbtBuffWriter.ErrDelete( key ) ) ) + { + cNodesDel++; + } + + CHECK( err == errBBTNodeNotFound ); + CHECK_CALL( tf.m_bbtBuff.ErrMoveFirst( BBTBuff::sfSkipDuplicates ) ); + be_key = 200; + CHECK( tf.m_bbtBuff.PnodeCurr()->CmpKey( key ) == 0 ); + + // Delete all versions of last node. + be_key = 1700; + while ( JET_errSuccess == ( err = tf.m_bbtBuffWriter.ErrDelete( key ) ) ) + { + cNodesDel++; + } + + CHECK( err == errBBTNodeNotFound ); + CHECK_CALL( tf.m_bbtBuff.ErrMoveLast() ); + be_key = 1600; + CHECK( tf.m_bbtBuff.PnodeCurr()->CmpKey( key ) == 0 ); + + wprintf( L"%d nodes deleted. ", cNodesDel ); + CHECK( tf.m_pbbtHeader->le_cNodes == cNodesBegin - cNodesDel ); + CHECK_CALL( tf.m_bbtBuff.ErrValidate() ); +} + +// ================================================================ +JETUNITTEST( BBTBuff, RangeDelete ) +// ================================================================ +{ + SetParam( pinstNil, ppibNil, JET_paramDatabasePageSize, 32768, NULL ); + + BBTBuffTestFixture tf; + CHECK( errBBTBuffFull == tf.ErrHotpointInsert( 100, 16 ) ); + + BigEndian be_key; + KEY key; + key.Nullify(); + key.suffix.SetPv( &be_key ); + key.suffix.SetCb( sizeof( be_key ) ); + + // Get latest version's data. + be_key = 1400; + CHECK_CALL( tf.m_bbtBuff.ErrSeekGEQOldest( key ) ); + SkipListLink linkStart = tf.m_bbtBuff.GetLinkToCurrNode(); + int cNodesToDel = 1; + while ( tf.m_bbtBuff.PnodeCurr()->FDuplicateNext0() ) + { + CHECK_CALL( tf.m_bbtBuff.ErrMoveNext( BBTBuff::sfAllowDuplicates ) ); + cNodesToDel++; + } + + int cNodes = tf.m_pbbtHeader->le_cNodes; + CHECK_CALL( tf.m_bbtBuffWriter.ErrRangeDelete( linkStart, cNodesToDel ) ); + CHECK( cNodes - cNodesToDel == tf.m_pbbtHeader->le_cNodes ); + + CHECK( wrnNDFoundLess == tf.m_bbtBuff.ErrSeekLEQ( key ) ); + be_key = 1300; + CHECK( tf.m_bbtBuff.PnodeCurr()->CmpKey( key ) == 0 ); + be_key = 1500; + tf.m_bbtBuff.ErrMoveNext( BBTBuff::sfAllowDuplicates ); + CHECK( tf.m_bbtBuff.PnodeCurr()->CmpKey( key ) == 0 ); + + // RangeDelete from 1500 to end. + linkStart = tf.m_bbtBuff.GetLinkToCurrNode(); + cNodesToDel = 1; + while ( JET_errSuccess == tf.m_bbtBuff.ErrMoveNext( BBTBuff::sfAllowDuplicates ) ) + { + cNodesToDel++; + } + + cNodes = tf.m_pbbtHeader->le_cNodes; + CHECK_CALL( tf.m_bbtBuffWriter.ErrRangeDelete( linkStart, cNodesToDel ) ); + CHECK( cNodes - cNodesToDel == tf.m_pbbtHeader->le_cNodes ); + + be_key = 1300; + CHECK_CALL( tf.m_bbtBuff.ErrMoveLast() ); + CHECK( tf.m_bbtBuff.PnodeCurr()->CmpKey( key ) == 0 ); + wprintf( L"%d nodes deleted. ", cNodesToDel ); + + CHECK_CALL( tf.m_bbtBuff.ErrValidate() ); +} + +// ================================================================ +JETUNITTESTEX( BBTBuff, Reogranize, JetSimpleUnitTest::dwBufferManager ) +// ================================================================ +{ + SetParam( pinstNil, ppibNil, JET_paramDatabasePageSize, 32768, NULL ); + + BBTBuffTestFixture tf; + CHECK( errBBTBuffFull == tf.ErrHotpointInsert( 100, 16 ) ); + + BigEndian be_key; + KEY key; + key.Nullify(); + key.suffix.SetPv( &be_key ); + key.suffix.SetCb( sizeof( be_key ) ); + + // Delete all versions of a node (latest to oldest). + int cNodesDel = tf.m_pbbtHeader->le_cNodes; + CHECK_CALL( tf.ErrDeleteKey( 1400 ) ); + cNodesDel -= tf.m_pbbtHeader->le_cNodes; + CHECK( cNodesDel > 0 ); + + int ibMicFree = tf.m_pbbtHeader->le_ibMicFree->ToInt(); + CHECK_CALL( tf.m_bbtBuffWriter.ErrReorganize() ); + + int ibMicFreeNew = tf.m_pbbtHeader->le_ibMicFree->ToInt(); + constexpr int cbSmallestNode = SkipListNode::Cb( 0, sizeof( int ), sizeof( int ) ); + + // Reorganize() should atleast free up this much space, assuming every node was level0 (least overhead). + CHECK( ibMicFreeNew + cNodesDel * cbSmallestNode <= ibMicFree ); + wprintf( L"Freed up %d bytes. ", ibMicFree - ibMicFreeNew ); + + CHECK_CALL( tf.m_bbtBuff.ErrValidate() ); +} + + +class MergeAndDelTestFixture : public JetTestFixture +{ +public: + BBTBuffTestFixture tf; + BBTBuffTestFixture tfFrom; + FixedHeapArray rgLinksToDel; + FixedHeapArray rgpNodesToMerge; + int cLinksToDel = 0; + int cNodesToMerge = 0; + + MergeAndDelTestFixture( JetUnitTestResult* presult ) : JetTestFixture( presult ) + {} + + void Prepare() + { + CHECK( errBBTBuffFull == tf.ErrHotpointInsert( 100, 16 ) ); + CHECK( errBBTBuffFull == tfFrom.ErrHotpointInsert( 100, 20 ) ); + + BigEndian be_key; + KEY key; + key.Nullify(); + key.suffix.SetPv( &be_key ); + key.suffix.SetCb( sizeof( be_key ) ); + + // Make some room by marking some nodes for deletion. + be_key = 1400; + CHECK_CALL( tf.m_bbtBuff.ErrSeekLEQ( key ) ); + int cNodesPerKey = 1 + *( (int*) tf.m_bbtBuff.PnodeCurr()->Data().Pv() ); + + rgLinksToDel = FixedHeapArray::MakeArray( cNodesPerKey * 3 ); + auto addLinkToDel = [this]( SkipListNode* pnode ) + { + rgLinksToDel[ cLinksToDel++ ] = tf.m_bbtBuff.GetLinkToCurrNode(); + }; + + CHECK_CALL( tf.ErrKeyForEachOldestToLatest( 1000, addLinkToDel ) ); + cLinksToDel--; // deliberately leave 1 node from this key behind + CHECK_CALL( tf.ErrKeyForEachOldestToLatest( 1300, addLinkToDel ) ); + cLinksToDel--; + CHECK_CALL( tf.ErrKeyForEachOldestToLatest( 1500, addLinkToDel ) ); + cLinksToDel--; + + CHECK( cLinksToDel <= cNodesPerKey * 3 ); + + // Get nodes to merge. + rgpNodesToMerge = FixedHeapArray::MakeArray( cNodesPerKey * 5 ); + auto addNodesToMerge = [this]( SkipListNode* pnode ) + { + rgpNodesToMerge[ cNodesToMerge++ ] = pnode; + }; + + // Gather more nodes than the amount that can fit. + CHECK_CALL( tfFrom.ErrKeyForEachOldestToLatest( 100, addNodesToMerge ) ); + CHECK_CALL( tfFrom.ErrKeyForEachOldestToLatest( 500, addNodesToMerge ) ); + CHECK_CALL( tfFrom.ErrKeyForEachOldestToLatest( 1000, addNodesToMerge ) ); + CHECK_CALL( tfFrom.ErrKeyForEachOldestToLatest( 1500, addNodesToMerge ) ); + CHECK_CALL( tfFrom.ErrKeyForEachOldestToLatest( 1900, addNodesToMerge ) ); + } + + // This class derives from JetTestFixture just to make CHECK() macros work. + // Setup_()/Teardown_() isn't used. +protected: + bool SetUp_() { return false; } + void TearDown_() {} +}; + +// ================================================================ +JETUNITTESTEX( BBTBuff, MergeAndDel, JetSimpleUnitTest::dwBufferManager ) +// ================================================================ +{ + SetParam( pinstNil, ppibNil, JET_paramDatabasePageSize, 32768, NULL ); + + MergeAndDelTestFixture md( m_presult ); + md.Prepare(); + + int cNodesBefore = md.tf.m_pbbtHeader->le_cNodes; + int cNodesMerged = 0; + int cNodesDeleted = 0; + ERR err = md.tf.m_bbtBuffWriter.ErrMergeAndDelNodes( + md.rgpNodesToMerge.PrgT(), + md.rgLinksToDel.PrgT(), + md.cNodesToMerge, + md.cLinksToDel, + &cNodesMerged, + &cNodesDeleted, + 0 ); + + wprintf( L"%d nodes merged, %d deleted. ", cNodesMerged, cNodesDeleted ); + CHECK( err == wrnBBTMergeTargetFull ); + CHECK( cNodesMerged <= md.cNodesToMerge ); + CHECK( cNodesDeleted == md.cLinksToDel ); + CHECK( cNodesBefore + cNodesMerged - cNodesDeleted == md.tf.m_pbbtHeader->le_cNodes ); + CHECK_CALL( md.tf.m_bbtBuff.ErrValidate() ); +} + +// ================================================================ +JETUNITTESTEX( BBTBuff, MergeAndDelNoSpace, JetSimpleUnitTest::dwBufferManager ) +// ================================================================ +{ + SetParam( pinstNil, ppibNil, JET_paramDatabasePageSize, 32768, NULL ); + + BBTBuffTestFixture tf; + BBTBuffTestFixture tfFrom; + CHECK( errBBTBuffFull == tf.ErrHotpointInsert( 100, 16, 0 ) ); // takes a long time because of n^2 runtime. + CHECK( errBBTBuffFull == tfFrom.ErrHotpointInsert( 100, 20 ) ); + + BigEndian be_key; + KEY key; + key.Nullify(); + key.suffix.SetPv( &be_key ); + key.suffix.SetCb( sizeof( be_key ) ); + + // Make some room by marking some nodes for deletion. + int cLinksToDel = 10; + unique_ptr rgLinksToDel( new SkipListLink[ cLinksToDel ] ); + + CHECK_CALL( tf.m_bbtBuff.ErrMoveFirst( BBTBuff::sfSkipDuplicates ) ); + for ( int i = 0; i < cLinksToDel; i++ ) + { + rgLinksToDel[ i ] = tf.m_bbtBuff.GetLinkToCurrNode(); + CHECK_CALL( tf.m_bbtBuff.ErrMoveNext( BBTBuff::sfSkipDuplicates ) ); + } + + // Get nodes to merge. + int cNodesToMerge = 10; + unique_ptr rgpNodesToMerge( new const SkipListNode * [ cNodesToMerge ] ); + CHECK_CALL( tfFrom.m_bbtBuff.ErrMoveFirst( BBTBuff::sfSkipDuplicates ) ); + for ( int i = 0; i < cNodesToMerge; i++ ) + { + rgLinksToDel[ i ] = tfFrom.m_bbtBuff.GetLinkToCurrNode(); + CHECK_CALL( tfFrom.m_bbtBuff.ErrMoveNext( BBTBuff::sfSkipDuplicates ) ); + } + + int cNodesBefore = tf.m_pbbtHeader->le_cNodes; + int cNodesMerged = 0; + int cNodesDeleted = 0; + ERR err = tf.m_bbtBuffWriter.ErrMergeAndDelNodes( + rgpNodesToMerge.get(), + rgLinksToDel.get(), + cNodesToMerge, + cLinksToDel, + &cNodesMerged, + &cNodesDeleted, + 0 ); + + CHECK( err == errBBTBuffFull ); + CHECK( cNodesMerged == 0 ); + CHECK( cNodesDeleted == 0 ); + CHECK( cNodesBefore + cNodesMerged - cNodesDeleted == tf.m_pbbtHeader->le_cNodes ); + CHECK_CALL( tf.m_bbtBuff.ErrValidate() ); +} + +// ================================================================ +JETUNITTEST( BBTBuff, Dump ) +// ================================================================ +{ + SetParam( pinstNil, ppibNil, JET_paramDatabasePageSize, 32768, NULL ); + + BBTBuffTestFixture tf; + CHECK( JET_errSuccess == tf.ErrRandomInsert( 10 ) ); + + std::string szDump = DumpBBTBuff( tf.m_bbtBuff, 0 ); + wprintf( L"\n%hs\n", szDump.c_str() ); +} + + +//////////////////////////////////////////////////////////////////// +// Negative tests. Tests with failed dml because of failed trx log. + +// ================================================================ +JETUNITTEST( BBTBuff, FailedInsert ) +// ================================================================ +{ + SetParam( pinstNil, ppibNil, JET_paramDatabasePageSize, 32768, NULL ); + + BBTBuffTestFixture tf; + BigEndian be_key = 42; + int iData = 10; + KEY key; + DATA data; + + key.Nullify(); + key.suffix.SetPv( &be_key ); + key.suffix.SetCb( sizeof( be_key ) ); + data.Nullify(); + data.SetPv( &iData ); + data.SetCb( sizeof( iData ) ); + + CHECK_CALL( tf.ErrRandomInsert( 10 ) ); + + DBTIME dbtimeInitial = tf.m_rgcsr[ 0 ].Dbtime(); + tf.m_bbtBuffTrxLogger.m_returnErr = ErrERRCheck( JET_errLogWriteFail ); + tf.m_bbtBuffTrxLogger.m_dbtimeCoordinated = dbtimeInitial + 10; + CHECK( JET_errLogWriteFail == tf.m_bbtBuffWriter.ErrInsert( bbtOpInsert, key, data, fSLNNone ) ); + + CHECK_CALL( tf.m_bbtBuff.ErrValidate() ); + CHECK( 10 == tf.m_pbbtHeader->le_cNodes ); + tf.m_rgcsr.ForEach( [this, dbtimeInitial]( const CSR& csr ) + { + CHECK( csr.Dbtime() == dbtimeInitial ); + } ); +} + +// ================================================================ +JETUNITTEST( BBTBuff, FailedDelete ) +// ================================================================ +{ + SetParam( pinstNil, ppibNil, JET_paramDatabasePageSize, 32768, NULL ); + + BBTBuffTestFixture tf; + KEY key; + DATA data; + + CHECK_CALL( tf.ErrRandomInsert( 10 ) ); + CHECK_CALL( tf.m_bbtBuff.ErrMoveFirst( BBTBuff::sfSkipDuplicates ) ); + + key = tf.m_bbtBuff.PnodeCurr()->Key(); + data = tf.m_bbtBuff.PnodeCurr()->Data(); + + DBTIME dbtimeInitial = tf.m_rgcsr[ 0 ].Dbtime(); + tf.m_bbtBuffTrxLogger.m_returnErr = ErrERRCheck( JET_errLogWriteFail ); + tf.m_bbtBuffTrxLogger.m_dbtimeCoordinated = dbtimeInitial + 10; + CHECK( JET_errLogWriteFail == tf.m_bbtBuffWriter.ErrDelete( key ) ); + + CHECK_CALL( tf.m_bbtBuff.ErrValidate() ); + CHECK( 10 == tf.m_pbbtHeader->le_cNodes ); + tf.m_rgcsr.ForEach( [this, dbtimeInitial]( const CSR& csr ) + { + CHECK( csr.Dbtime() == dbtimeInitial ); + } ); +} + +// ================================================================ +JETUNITTEST( BBTBuff, FailedRangeDelete ) +// ================================================================ +{ + SetParam( pinstNil, ppibNil, JET_paramDatabasePageSize, 32768, NULL ); + + BBTBuffTestFixture tf; + + CHECK_CALL( tf.ErrRandomInsert( 10 ) ); + CHECK_CALL( tf.m_bbtBuff.ErrMoveFirst( BBTBuff::sfSkipDuplicates ) ); + + SkipListLink linkFirst = tf.m_bbtBuff.GetLinkToCurrNode(); + DBTIME dbtimeInitial = tf.m_rgcsr[ 0 ].Dbtime(); + tf.m_bbtBuffTrxLogger.m_returnErr = ErrERRCheck( JET_errLogWriteFail ); + tf.m_bbtBuffTrxLogger.m_dbtimeCoordinated = dbtimeInitial + 10; + CHECK( JET_errLogWriteFail == tf.m_bbtBuffWriter.ErrRangeDelete( linkFirst, 5 ) ); + + CHECK_CALL( tf.m_bbtBuff.ErrValidate() ); + CHECK( 10 == tf.m_pbbtHeader->le_cNodes ); + tf.m_rgcsr.ForEach( [this, dbtimeInitial]( const CSR& csr ) + { + CHECK( csr.Dbtime() == dbtimeInitial ); + } ); +} + +// ================================================================ +JETUNITTESTEX( BBTBuff, FailedMergeAndDel, JetSimpleUnitTest::dwBufferManager ) +// ================================================================ +{ + SetParam( pinstNil, ppibNil, JET_paramDatabasePageSize, 32768, NULL ); + + MergeAndDelTestFixture md( m_presult ); + md.Prepare(); + + DBTIME dbtimeInitial = md.tf.m_rgcsr[ 0 ].Dbtime(); + md.tf.m_bbtBuffTrxLogger.m_dbtimeCoordinated = dbtimeInitial + 10; + md.tf.m_bbtBuffTrxLogger.m_returnErr = ErrERRCheck( JET_errLogWriteFail ); + + int cNodesBefore = md.tf.m_pbbtHeader->le_cNodes; + int cNodesMerged = 0; + int cNodesDeleted = 0; + ERR err = md.tf.m_bbtBuffWriter.ErrMergeAndDelNodes( + md.rgpNodesToMerge.PrgT(), + md.rgLinksToDel.PrgT(), + md.cNodesToMerge, + md.cLinksToDel, + &cNodesMerged, + &cNodesDeleted, + 0 ); + + wprintf( L"%d nodes merged, %d deleted. ", cNodesMerged, cNodesDeleted ); + CHECK( err == JET_errLogWriteFail ); + CHECK( cNodesMerged == 0 ); + CHECK( cNodesDeleted == 0 ); + CHECK( cNodesBefore == md.tf.m_pbbtHeader->le_cNodes ); + CHECK_CALL( md.tf.m_bbtBuff.ErrValidate() ); + md.tf.m_rgcsr.ForEach( [this, dbtimeInitial]( const CSR& csr ) + { + CHECK( csr.Dbtime() == dbtimeInitial ); + } ); +} + diff --git a/dev/ese/src/ese/cpage.cxx b/dev/ese/src/ese/cpage.cxx index fc6d5115..f52fae34 100644 --- a/dev/ese/src/ese/cpage.cxx +++ b/dev/ese/src/ese/cpage.cxx @@ -2568,6 +2568,12 @@ ERR CPAGE::ErrLoadPage( UtilMemCpy( m_bfl.pv, pv, cb ); + // A loaded page is considered dirty-on-load. So save the scrub state. + // For a loaded page, scrub state before dirty isn't available. + // So RevertDbtime() will leave the scrub state unchanged. + // Note that this is consistent with behavior prior to the bug fix for reverting scrub flag. + m_fPageScrubbedPrevSet = fTrue; + m_fPageScrubbedPrev = !!FScrubbed(); return JET_errSuccess; } @@ -2602,7 +2608,7 @@ VOID CPAGE::LoadNewPage( #ifdef ENABLE_JET_UNIT_TEST // ================================================================ -VOID CPAGE::LoadNewTestPage( _In_ const ULONG cb, _In_ const IFMP ifmp ) +VOID CPAGE::LoadNewTestPage( _In_ const ULONG cb, _In_ const IFMP ifmp, const PGNO pgno /* = 42 */ ) // ================================================================ { Assert( 0 != cb ); @@ -2625,7 +2631,7 @@ VOID CPAGE::LoadNewTestPage( _In_ const ULONG cb, _In_ const IFMP ifmp ) // Initialize the Page so it is usable for testing // - PreInitializeNewPage_( ppibNil, ifmp, 2, 3, 0 ); + PreInitializeNewPage_( ppibNil, ifmp, pgno, 3, 0 ); ConsumePreInitPage( 0x0 ); // Avoid Uninitialized Page issues @@ -2979,6 +2985,7 @@ VOID CPAGE::RevertDbtime( const DBTIME dbtime, const ULONG fFlags ) Expected( fFlags != 0 ); m_platchManager->AssertPageIsDirty( m_bfl ); Assert( FAssertWriteLatch( ) ); + Assert( m_fPageScrubbedPrevSet ); // prev fPageScrubbed state must've been captured on dirty. ((PGHDR*)m_bfl.pv)->dbtimeDirtied = dbtime; @@ -2986,33 +2993,23 @@ VOID CPAGE::RevertDbtime( const DBTIME dbtime, const ULONG fFlags ) // state with the way scrubbed was implemented embedded in ::Dirty() and ::DirtyForScrub(). // Still, we don't expect any other flags to change other than fPageScrubbed. // - // Its also possible we are replaying a log on an available lag on a table which was deleted and reverted with fPageFDPDelete. - // We do not want to overwrite that flag. So we might be restoring that flag. - // // If the FireWall() below goes off, it doesn't necessarily mean we have // a corruption problem, but it means there will be a divergence between // copies in a replicated system that may triger a DB divergence error. #ifndef ENABLE_JET_UNIT_TEST - if ( ( FFlags() | fPageScrubbed ) != ( fFlags | fPageScrubbed ) && - ( FFlags() | fPageFDPDelete ) != ( fFlags | fPageFDPDelete ) && - ( FFlags() | fPageScrubbed | fPageFDPDelete ) != ( fFlags | fPageScrubbed | fPageFDPDelete ) ) + if ( ( FFlags() | fPageScrubbed ) != ( fFlags | fPageScrubbed ) ) { FireWall( OSFormat( "RevertDbtime:0x%I32x:0x%I32x", fFlags, FFlags() ) ); } -#endif - const BOOL fScrubbedBefore = ( fFlags & fPageScrubbed ); - if ( !FScrubbed() != !fScrubbedBefore ) - { - SetFScrubbedValue_( fScrubbedBefore ); - } - // If existing root page had been marked for FDP delete but current root page isn't, mark it again. - const BOOL fPageFDPDeleteBefore = ( fFlags & fPageFDPDelete ); - if ( fPageFDPDeleteBefore && !FPageFDPDelete() ) + const ULONG fScrubbedBefore = ( fFlags & fPageScrubbed ); + if ( m_fPageScrubbedPrev != fScrubbedBefore ) { - Assert( FRootPage() ); - SetPageFDPDelete( fPageFDPDeleteBefore ); + FireWall( OSFormat( "RevertDbtime(fPageScrubbed):0x%I32x:0x%I32x", fScrubbedBefore, m_fPageScrubbedPrev ) ); } +#endif + + SetFScrubbedValue_( m_fPageScrubbedPrev ); } @@ -3387,6 +3384,35 @@ VOID CPAGE::ReplaceReservedTag( INT itag, const DATA* rgdata, INT cdata ) Replace_( itag, rgdata, cdata, 0 ); } +// ================================================================ +VOID CPAGE::ResetReservedTag( INT itag, INT cb, BYTE fill ) +// ================================================================ +// +// Sets size and pattern-fills a reserved tag. +// +//- +{ + Assert( itag < CTagReserved_() ); + + const BOOL fSmallFormat = FSmallPageFormat(); + PGHDR* ppghdr = (PGHDR*) m_bfl.pv; + TAG* ptag = PtagFromItag_( itag ); + + // Release space. Stored data will be lost ! + ppghdr->ibMicFree += ptag->Cb( fSmallFormat ); + ptag->SetIb( this, 0 ); + ptag->SetCb( this, 0 ); + FreeSpace_( cb ); + + ptag->SetIb( this, ppghdr->ibMicFree ); + ppghdr->ibMicFree = USHORT( ppghdr->ibMicFree + cb ); + ptag->SetCb( this, (USHORT) cb ); + const USHORT cbFree = (USHORT) ( ppghdr->cbFree - cb ); + ppghdr->cbFree = cbFree; + + BYTE* pb = PbFromIb_( ptag->Ib( fSmallFormat ) ); + memset( pb, fill, ptag->Cb( fSmallFormat ) ); +} // ================================================================ bool CPAGE::FResvTagFormatEnabled() @@ -3429,6 +3455,8 @@ VOID CPAGE::ReleaseWriteLatch( BOOL fTossImmediate ) m_objidPreInit = objidNil; } + m_fPageScrubbedPrevSet = fFalse; + m_platchManager->ReleaseWriteLatch( &m_bfl, !!fTossImmediate ); Abandon_(); Assert( FAssertUnused_( ) ); @@ -3446,6 +3474,7 @@ VOID CPAGE::ReleaseRDWLatch( BOOL fTossImmediate ) ASSERT_VALID( this ); DebugCheckAll(); + m_fPageScrubbedPrevSet = fFalse; m_platchManager->ReleaseRDWLatch( &m_bfl, !!fTossImmediate ); Abandon_(); Assert( FAssertUnused_( ) ); @@ -3466,6 +3495,7 @@ VOID CPAGE::ReleaseReadLatch( BOOL fTossImmediate ) DebugCheckAll(); #endif // DEBUG_PAGE + m_fPageScrubbedPrevSet = fFalse; m_platchManager->ReleaseReadLatch( &m_bfl, !!fTossImmediate ); Abandon_(); Assert( FAssertUnused_( ) ); @@ -3808,6 +3838,12 @@ VOID CPAGE::Dirty_( const BFDirtyFlags bfdf ) // for now, but someday in the future we may allow dirty small pages Assert( FIsNormalSized() ); + if ( !m_fPageScrubbedPrevSet ) + { + m_fPageScrubbedPrevSet = fTrue; + m_fPageScrubbedPrev = !!FScrubbed(); + } + if( FLoadedPage() ) { } @@ -6937,6 +6973,11 @@ ERR CPAGE::DumpHeader( CPRINTF * pcprintf, DWORD_PTR dwOffset ) const (*pcprintf)( "\t\tRoot page\n" ); } + if ( FBBTBuffPage() ) + { + (*pcprintf)( "\t\tBBT %sPage", FBBTBuffRootPage() ? "Base " : "" ); + } + BOOL fNewExtHdrFormat = fFalse; BYTE fNodeFlag = 0; if( FFDPPage() ) diff --git a/dev/ese/src/ese/cpage_test.cxx b/dev/ese/src/ese/cpage_test.cxx index 7ab18b08..f8ffb7c0 100644 --- a/dev/ese/src/ese/cpage_test.cxx +++ b/dev/ese/src/ese/cpage_test.cxx @@ -2293,6 +2293,7 @@ void CPageTestFixture::TestInternalTest() void CPageTestFixture::TestRevertDbtimeCheckDbtime() // ================================================================ { + m_cpage.Dirty_( bfdfDirty ); m_cpage.SetDbtime( 1 ); CHECK( 1 == m_cpage.Dbtime() ); m_cpage.SetDbtime( 2 ); @@ -2317,6 +2318,7 @@ void CPageTestFixture::TestRevertDbtimeCheckFlags() m_cpage.SetFlags( fFlagsAfter ); CHECK( !m_cpage.FScrubbed() ); CHECK( fFlagsAfter == m_cpage.FFlags() ); + m_cpage.Dirty_( bfdfDirty ); m_cpage.SetDbtime( 2 ); CHECK( 2 == m_cpage.Dbtime() ); m_cpage.RevertDbtime( 1, fFlagsBefore ); @@ -2324,29 +2326,43 @@ void CPageTestFixture::TestRevertDbtimeCheckFlags() CHECK( !m_cpage.FScrubbed() ); CHECK( fFlagsAfter == m_cpage.FFlags() ); + // Check scrub state is set/released correctly + CHECK( m_cpage.m_fPageScrubbedPrevSet ); + m_cpage.ReleaseReadLatch(); + CHECK( !m_cpage.m_fPageScrubbedPrevSet ); + + m_cpage.LoadNewPage( m_ifmp, m_pgno, m_objidFDP, m_fFlags, m_pvPage, CbPage_() ); + // Scrub is unset and changes. - fFlagsBefore = CPAGE::fPageLeaf | CPAGE::fPageLongValue; + fFlagsBefore = CPAGE::fPageLeaf | CPAGE::fPageLongValue | CPAGE::fPageRoot; m_cpage.SetFlags( fFlagsBefore ); CHECK( !m_cpage.FScrubbed() ); CHECK( fFlagsBefore == m_cpage.FFlags() ); - fFlagsAfter = CPAGE::fPageLeaf | CPAGE::fPageLongValue | CPAGE::fPageRoot | CPAGE::fPageScrubbed; - m_cpage.SetFlags( fFlagsAfter ); - CHECK( m_cpage.FScrubbed() ); - CHECK( fFlagsAfter == m_cpage.FFlags() ); + fFlagsAfter = CPAGE::fPageLeaf | CPAGE::fPageLongValue | CPAGE::fPageRoot; + m_cpage.Dirty_( bfdfDirty ); + m_cpage.SetFScrubbed_(); m_cpage.SetDbtime( 2 ); CHECK( 2 == m_cpage.Dbtime() ); m_cpage.RevertDbtime( 1, fFlagsBefore ); CHECK( 1 == m_cpage.Dbtime() ); CHECK( !m_cpage.FScrubbed() ); - CHECK( ( fFlagsAfter & ~CPAGE::fPageScrubbed ) == m_cpage.FFlags() ); + CHECK( fFlagsAfter == m_cpage.FFlags() ); + + // Check scrub state is set/released correctly + CHECK( m_cpage.m_fPageScrubbedPrevSet ); + m_cpage.ReleaseRDWLatch(); + CHECK( !m_cpage.m_fPageScrubbedPrevSet ); + + m_cpage.LoadNewPage( m_ifmp, m_pgno, m_objidFDP, m_fFlags, m_pvPage, CbPage_() ); // Scrub is set and doesn't change. - fFlagsBefore = CPAGE::fPageLeaf | CPAGE::fPageLongValue | CPAGE::fPageScrubbed; + fFlagsBefore = CPAGE::fPageLeaf | CPAGE::fPageLongValue | CPAGE::fPageRoot | CPAGE::fPageScrubbed; m_cpage.SetFlags( fFlagsBefore ); CHECK( m_cpage.FScrubbed() ); CHECK( fFlagsBefore == m_cpage.FFlags() ); fFlagsAfter = CPAGE::fPageLeaf | CPAGE::fPageLongValue | CPAGE::fPageRoot | CPAGE::fPageScrubbed; - m_cpage.SetFlags( fFlagsAfter ); + m_cpage.Dirty_( bfdfDirty ); + m_cpage.SetFScrubbed_(); CHECK( m_cpage.FScrubbed() ); CHECK( fFlagsAfter == m_cpage.FFlags() ); m_cpage.SetDbtime( 2 ); @@ -2356,21 +2372,24 @@ void CPageTestFixture::TestRevertDbtimeCheckFlags() CHECK( m_cpage.FScrubbed() ); CHECK( fFlagsAfter == m_cpage.FFlags() ); + // Check scrub state is set/released correctly + CHECK( m_cpage.m_fPageScrubbedPrevSet ); + m_cpage.ReleaseWriteLatch(); + CHECK( !m_cpage.m_fPageScrubbedPrevSet ); + + m_cpage.LoadNewPage( m_ifmp, m_pgno, m_objidFDP, m_fFlags, m_pvPage, CbPage_() ); + // Scrub is set and changes. - fFlagsBefore = CPAGE::fPageLeaf | CPAGE::fPageLongValue | CPAGE::fPageScrubbed; + fFlagsBefore = CPAGE::fPageLeaf | CPAGE::fPageLongValue | CPAGE::fPageRoot | CPAGE::fPageScrubbed; m_cpage.SetFlags( fFlagsBefore ); CHECK( m_cpage.FScrubbed() ); CHECK( fFlagsBefore == m_cpage.FFlags() ); fFlagsAfter = CPAGE::fPageLeaf | CPAGE::fPageLongValue | CPAGE::fPageRoot; - m_cpage.SetFlags( fFlagsAfter ); - CHECK( !m_cpage.FScrubbed() ); - CHECK( fFlagsAfter == m_cpage.FFlags() ); + m_cpage.Dirty_( bfdfDirty ); m_cpage.SetDbtime( 2 ); CHECK( 2 == m_cpage.Dbtime() ); m_cpage.RevertDbtime( 1, fFlagsBefore ); CHECK( 1 == m_cpage.Dbtime() ); - CHECK( m_cpage.FScrubbed() ); - CHECK( fFlagsAfter == ( m_cpage.FFlags() & ~CPAGE::fPageScrubbed ) ); } // ================================================================ diff --git a/dev/ese/src/ese/dbshrink.cxx b/dev/ese/src/ese/dbshrink.cxx index e6741811..e8bf66e8 100644 --- a/dev/ese/src/ese/dbshrink.cxx +++ b/dev/ese/src/ese/dbshrink.cxx @@ -1343,12 +1343,12 @@ LOCAL VOID SHKIRootMoveRevertDbTime( ROOTMOVE* const prm ) { if ( prm->csrCatObj[iCat].Latch() == latchWrite ) { - prm->csrCatObj[iCat].RevertDbtime( prm->dbtimeBeforeCatObj[iCat], prm->fFlagsBeforeCatObj[iCat] ); + prm->csrCatObj[ iCat ].RevertDbtime( prm->dbtimeBeforeCatObj[ iCat ], prm->fFlagsBeforeCatObj[ iCat ] ); } if ( prm->csrCatClustIdx[iCat].Latch() == latchWrite ) { - prm->csrCatClustIdx[iCat].RevertDbtime( prm->dbtimeBeforeCatClustIdx[iCat], prm->fFlagsBeforeCatClustIdx[iCat] ); + prm->csrCatClustIdx[ iCat ].RevertDbtime( prm->dbtimeBeforeCatClustIdx[ iCat ], prm->fFlagsBeforeCatClustIdx[ iCat ] ); } } } diff --git a/dev/ese/src/ese/eselibwithtests/CMakeLists.txt b/dev/ese/src/ese/eselibwithtests/CMakeLists.txt index 98d4511c..3215e9e1 100644 --- a/dev/ese/src/ese/eselibwithtests/CMakeLists.txt +++ b/dev/ese/src/ese/eselibwithtests/CMakeLists.txt @@ -39,6 +39,7 @@ add_library(eselibwithtests SHARED # Tests daehelpers_test.cxx + ../bbtbuff_test.cxx ../clogredomaps_test.cxx ../cpage_test.cxx ../checksum_test.cxx diff --git a/dev/ese/src/ese/node.cxx b/dev/ese/src/ese/node.cxx index 9ad48eea..019e4a22 100644 --- a/dev/ese/src/ese/node.cxx +++ b/dev/ese/src/ese/node.cxx @@ -2664,7 +2664,7 @@ INT INDIGetReservedTag( _In_ CPAGE& cpage, _In_ NodeResvTagId resvTagId, _Out_ D INT INDGetReservedTag( _In_ FUCB* pfucb, _In_ CSR* pcsr, _In_ NodeResvTagId resvTagId, _Out_ DATA* pdata ) // ================================================================ { - Assert( g_rgfmp[ pfucb->ifmp ].ErrDBFormatFeatureEnabled( JET_efvReservedTags ) ); + Assert( JET_errSuccess == g_rgfmp[ pfucb->ifmp ].ErrDBFormatFeatureEnabled( JET_efvReservedTags ) ); Assert( resvTagId <= rtidMax ); return INDIGetReservedTag( pcsr->Cpage(), resvTagId, pdata ); } @@ -2699,7 +2699,7 @@ INT INDAddReservedTag( _In_ FUCB* pfucb, _In_ CSR* pcsr, _In_ NodeResvTagId resv // Caller should log this operation before calling this API. { Assert( pcsr->FDirty() ); - Assert( g_rgfmp[ pfucb->ifmp ].ErrDBFormatFeatureEnabled( JET_efvReservedTags ) ); + Assert( pfucb == NULL || JET_errSuccess == g_rgfmp[ pfucb->ifmp ].ErrDBFormatFeatureEnabled( JET_efvReservedTags ) ); Assert( pcsr->Cpage().CbPageFree() > cb ); Assert( resvTagId <= rtidMax ); @@ -2732,7 +2732,7 @@ VOID NDReplaceReservedTag( _In_ FUCB* pfucb, _In_ CSR* pcsr, _In_ NodeResvTagId // Caller should log this operation before calling this API. { Assert( pcsr->FDirty() ); - Assert( g_rgfmp[ pfucb->ifmp ].ErrDBFormatFeatureEnabled( JET_efvReservedTags ) ); + Assert( JET_errSuccess == g_rgfmp[ pfucb->ifmp ].ErrDBFormatFeatureEnabled( JET_efvReservedTags ) ); Assert( resvTagId <= rtidMax ); NDIReplaceReservedTag( pcsr->Cpage(), resvTagId, data ); diff --git a/dev/ese/src/inc/bbtbuff.hxx b/dev/ese/src/inc/bbtbuff.hxx new file mode 100644 index 00000000..8b25d598 --- /dev/null +++ b/dev/ese/src/inc/bbtbuff.hxx @@ -0,0 +1,1293 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#pragma once + +/************************************************************************************************************ +A BBT buffer is a data structure used to store tree operations on the BBT at intermediate levels of the tree. +Each non-leaf (root & internal) page in a BBT has a BBT buffer. It has the following strucutre: +1. Each BBT buffer comprises of multiple physically contiguous ESE pages. +2. The first page of the BBT buffer is called the BBT root (or BBT base page). A portion of it is allocated + to hold the BBT buffer using a reserved tag. The rest is available for use by classic BT nodes for storing + key separators. +3. The rest of the pages comprising the BBT buffer each contain 1 reserved tag that takes up all of the + available space on the page. +3. All of this space is considered one logical buffer indexed by an offset starting from the BBT root. +3. The BBT header is stored at offset 0 (i.e. at the begining of the BBT root). +4. The rest of the space is used to store nodes in a SkipList structure. +5. Each node in the skiplist stores a BBTBuffOpcode, KEY & DATA for the operation. +6. The buffer is designed to be append-only. Each new operation on the tree results in adding a new node to + the buffer with the right opcode. E.g., a delete on a key will result in an insertion of bbtOpDelete with + the right key (and no data). + +The BBT buffer provides the following: +1. It is sorted, allowing avg log(n) seek/insert/delete operations. +2. It allows adding duplicate nodes, and provides a stable sort over the nodes. That is, the BBT buffer + guarantees that the insertion order of duplicate nodes is maintained by the sort. This means that + operations can be enumerated from oldest to latest. +3. Provides facilities for bulk merge and deletion of nodes. This is used to 'evict' operations from the + top-level to lower levels of the BBT, as buffers get full. + +Layering and Responsibilities: +1. Owns the physical format of the BBT buffer. Uses CPAGE to manage per-page format. Comparable to NODE. +2. Also owns runtime interface to the buffer: + - Read APIs and Write APIs (non-transacted). + - The companion class BBTBuffWriter provides transacted APIs for writing, allowing redo through a trx log. + - Manages latches. Built on top of CSR latch APIs. + - Also handles currency for the buffer. This is comparable to the CSR interface. + +BBTBuff Latching policy: +1. Base page must always be kept latched (caller must own this latch). +2. Base page latch state when the BBTBuff is loaded is considered 'ground state.' +3. Any latches obtained during any BBTBuff operation are kept latched at 'ground state' + for the lifetime of BBTBuff or until Release() is called. +4. For modify operations, latches will be upgraded to write for the duration of the operation, + then downgraded to ground state when the operation is complete. +5. ErrUpgradeToWriteMode() means that the ground state is upgraded to latchWrite alongwith the base page latch, + so after that, any latches acquired will be kept at latchWrite state. +6. Downgrade() can be called to bring the ground state back to read or RIW. Base page latch will be + downgraded accordingly. +7. Latch order: Base page must be latched/upgraded first. The rest of the pages can be latched in any order. + +Above policy is used in three different ways: +1. Read-only: Simplest form. All latches stay at latchRead. +2. RIW: BBTBuff is loaded with base page at latchRIW. Any modify operations will temporarily + upgrade to latchWrite, and then downgrades latches back to latchRIW. Theoretically, this + can also be done starting with latchRead, but latch conflicts could cause us to lose + all latches and fail (BBTBuff handles this case properly, and marks itself unusable). + However, we always start with RIW for a write operation. +3. Write: ErrUpgradeToWriteMode() is used to keep BBTBuff in latchWrite state. + +************************************************************************************************************/ + +// Maximum levels supported by the skiplist +// 16 allows 64k nodes to be stored in the list with +// list operations achieving average case O(logn) performance. +constexpr int MAX_LEVELS = 16; + +// This is the reserved itag on every BBTBuff root page. Must be the second reserved itag. +constexpr int itagReservedBBTBuffRoot = 1; + +struct BBTBuffFormat +{ + int cbCPAGE; // size of the underlying physical page (CPAGE). + int cbBBTRoot; // size of the BBT root, amount of space allocated to the BBT buffer on the BBT base page. + int cpgInBBTBuff; // total number of physical pages that comprise a BBT superpage. +}; + +// UA_TODO: magic numbers, tune later +extern constexpr __declspec( selectany ) BBTBuffFormat BBTBUFF_FORMAT_CONSTANTS[] = +{ + { 4 * 1024, 1 * 1024, 32 }, // 128k BBTBuff + { 8 * 1024, 4 * 1024, 16 }, // 128k + { 16 * 1024, 8 * 1024, 16 }, // 256k + { 32 * 1024, 24 * 1024, 16 }, // 512k +}; + +/////////////////////////////////////////////////////////////////////////////// +// BEGIN persisted structures +#include + +PERSISTED +enum BBTBuffVersion : BYTE +{ + bbtvInvalid = 0, + bbtvInitial = 1, + + bbtvMax = 255 +}; + +PERSISTED +enum BBTBuffOpcode : BYTE +{ + bbtOpNone = 0, + bbtOpInsert = 1, + bbtOpUpdate = 2, + bbtOpDelete = 3, + bbtOpEscrowUpdate = 4, + bbtOpUpsert = 5, + + bbtOpMax = 15, +}; + +// SkipListNode structure: +// | Hdr | m_cbKey+Flags | m_cbData | rgLinks[ m_iLevel + 2 ] | Key | Data | +// Bytes: 0 2 DVC-4 6 11->51 IbKey()+m_CbKey IbData()+m_cbData +// D = fDeleted +// V = fVersioned +// C = Compressed +// - = Reserved +// iLevel+2 because level0 node stores prev0 and next0 link + +// Stored on the high bits of SkipListNode::m_cbKey +PERSISTED +enum SkipListNodeFlags : USHORT +{ + fSLNNone = 0, + fSLNDeleted = 0x01, + fSLNVersioned = 0x02, + fSLNCompressed = 0x04, +}; + +inline SkipListNodeFlags operator|( SkipListNodeFlags a, SkipListNodeFlags b ) +{ + return static_cast( static_cast( a ) | static_cast( b ) ); +} + +inline SkipListNodeFlags operator&( SkipListNodeFlags a, SkipListNodeFlags b ) +{ + return static_cast( static_cast( a ) & static_cast( b ) ); +} + +inline SkipListNodeFlags operator~( SkipListNodeFlags a ) +{ + return static_cast( ~static_cast( a ) ); +} + +PERSISTED +class SkipListLink +{ + friend class SkipListLinkArray; + static const int ALIGNMENT = 4; // must be power-of-2 + int32_t ib; + +public: + SkipListLink() = default; + SkipListLink( const SkipListLink& rhs ) = default; + SkipListLink( nullptr_t unused ) { ib = 0; } // nullptr_t used to only allow nullptr or 0 as an argument, + // other ints or ptr types aren't convertible to nullptr_t + bool FNull() const { return ib == 0; } + void Nullify() { ib = 0; } + int ToInt() const { return ib; } + + bool operator==( const SkipListLink& rhs ) const { return ( ib == rhs.ib ); } + bool operator!=( const SkipListLink& rhs ) const { return (ib != rhs.ib ); } + SkipListLink& operator=( const SkipListLink& rhs ) = default; + + SkipListLink& Inc( int _ib ) + { + Assert( FRounded( _ib ) ); + ib += _ib; + return *this; + } + + SkipListLink& Dec( int _ib ) + { + Assert( FRounded( _ib ) ); + ib -= _ib; + return *this; + } + +public: + static SkipListLink FromInt( int _ib ) + { + Assert( FRounded( _ib ) ); + SkipListLink link; + link.ib = _ib; + return link; + } + + static_assert( FPowerOf2( ALIGNMENT ), "The following functions require ALIGNMENT to be a power-of-2" ); + + static SkipListLink Null() { return SkipListLink( 0 ); } + static int Roundup( int _ib ) { return ( _ib + ALIGNMENT - 1 ) & -ALIGNMENT; } + static bool FRounded( int _ib ) { return !( _ib & ( ALIGNMENT - 1 ) ); } +}; + +#include + +// SkipListLinkArray structure: +// | Preamble | rgLinks[ cLinks ] | +// Bytes: 0 1->x x->x+c*2 +// Preamable = A sequence of bytes with each 4-bits in it storing the lower order 4 bits of the link. +// So each link is 16-bits in rgLinks[i] + 4-bits in preamble = 20-bits. +// The 20-bits are further left-shifted to yield a 22-bit link (that can address 4-byte aligned upto 4mb). + +// Manipulates persisted representation of skiplist links of a given node. +// The class layout doesn't define the persisted layout of the array. +// Instead, its implementation defines the persisted layout. +PERSISTED +class SkipListLinkArray +{ +public: + static const int REMOTE_BITS = 4; // bits per link stored in the preamble bytes + static const int LOCAL_BITS = 16; // bits stored with each link + static const int REMOTE_BITMASK = ( 1 << REMOTE_BITS ) - 1; + static const int LINK_MAX = ( 1 << ( LOCAL_BITS + REMOTE_BITS ) ) * SkipListLink::ALIGNMENT - 1; // 4mb + +private: + BYTE* m_rgb = nullptr; + int m_cLinks = 0; + int m_iOffset = 0; + + SkipListLink GetLink_( int i ) const + { + Assert( i < m_cLinks ); + const UnalignedLittleEndianPtr prgLinks = (USHORT*) &m_rgb[ CbPreamble( m_cLinks ) ]; + int ilink = prgLinks[ i ] << REMOTE_BITS; + int remoteBits = m_rgb[ ( i * REMOTE_BITS ) / 8 ]; + remoteBits = ( remoteBits >> ( ( i * REMOTE_BITS ) % 8 ) ) & REMOTE_BITMASK; + ilink |= remoteBits; + return SkipListLink::FromInt( ilink * SkipListLink::ALIGNMENT ); + } + + void SetLink_( int i, SkipListLink link ) + { + UnalignedLittleEndianPtr prgLinks = (USHORT*) &m_rgb[ CbPreamble( m_cLinks ) ]; + + Assert( i < m_cLinks ); + Assert( link.ib <= LINK_MAX ); + int ilink = link.ib / SkipListLink::ALIGNMENT; + int remoteBits = ilink & REMOTE_BITMASK; + prgLinks[ i ] = (USHORT)( ilink >> REMOTE_BITS ); + remoteBits <<= ( i * REMOTE_BITS ) % 8; + int remoteMask = ~( REMOTE_BITMASK << ( ( i * REMOTE_BITS ) % 8 ) ); + m_rgb[ ( i * REMOTE_BITS ) / 8 ] &= remoteMask; // zero out old bits + m_rgb[ ( i * REMOTE_BITS ) / 8 ] |= remoteBits; // write new bits + + Assert( GetLink_( i ) == link ); + } + +public: + SkipListLinkArray() = default; + SkipListLinkArray( const SkipListLinkArray& ) = default; + SkipListLinkArray( BYTE* rgb, int cLinks, int iOffset ) + : m_rgb( rgb ), m_cLinks( cLinks ), m_iOffset( iOffset ) {}; + + SkipListLinkArray& operator=( const SkipListLinkArray& ) = default; + + SkipListLink operator[]( int i ) const { return GetLink_( m_iOffset + i ); } + void SetLink( int i, SkipListLink link ) { SetLink_( m_iOffset + i, link ); } + static constexpr int CbPreamble( int cLinks ) { return ( cLinks * REMOTE_BITS + 7 ) / 8; } + static constexpr int Cb( int cLinks ) { return CbPreamble( cLinks ) + cLinks * ( LOCAL_BITS / 8 ); } +}; + +#include + +PERSISTED +struct SkipListNodeHdr +{ + static_assert( FHostIsLittleEndian(), "The union below needs to be fixed for big-endian architectures." ); + union + { + struct + { + BYTE fDuplicate : 1; // indicates that the next node (at level 0) is a duplicate of this one. + BYTE reserved : 7; // reserved for future expansion + BYTE iLevel : 4; // skiplist level of the node + BBTBuffOpcode opcode : 4; + }; + UnalignedLittleEndian le_usHeader; + }; + + SkipListNodeHdr() = default; + SkipListNodeHdr( const SkipListNodeHdr& rhs ) + { le_usHeader = rhs.le_usHeader; } + bool IsValid() const { return ( reserved == 0 ); } + + const SkipListNodeHdr& operator=( const SkipListNodeHdr& rhs ) + { + le_usHeader = rhs.le_usHeader; + return *this; + } +}; + +PERSISTED +class SkipListNode +{ + friend class SkipListAsserts; + friend class BBTBuff; + template friend class BBTBuffWriter; + + static const USHORT CBKEY_MASK = 0x0fff; + static const USHORT CBKEY_FLAGS_SHIFT = 12; + + static constexpr USHORT LShiftFlag( SkipListNodeFlags flags ) { return static_cast( flags ) << CBKEY_FLAGS_SHIFT; } + + SkipListNodeHdr m_header; + UnalignedLittleEndian m_cbKey; + UnalignedLittleEndian m_cbData; + BYTE m_rgbNode[ 0 ]; // Variable sized strucures as shown above in the SkipListNode structure comment + + SkipListNode( int level, BBTBuffOpcode opcode, int cbKey, int cbData ) + { + Assert( level >= 0 && level < MAX_LEVELS ); + Assert( cbKey <= CBKEY_MASK ); + Assert( cbData <= USHRT_MAX ); + + m_header.le_usHeader = 0; + m_header.iLevel = level; + m_header.opcode = opcode; + + m_cbKey = (USHORT) cbKey; + m_cbData = (USHORT) cbData; + memset( m_rgbNode, 0, CbLinks( level ) ); + } + + int IbLinks() const { return offsetof( SkipListNode, m_rgbNode ); } + int IbKey() const { return IbLinks() + CbLinks( m_header.iLevel ); } + int IbData() const { return IbKey() + CbKey(); } + + BYTE* PbLinks() const { return ( ( (BYTE*) this ) + IbLinks() ); } + BYTE* PbKey() const { return ( ( (BYTE*) this ) + IbKey() ); } + BYTE* PbData() const { return ( ( (BYTE*) this ) + IbData() ); } + + void SetDeleted( bool fDeleted ) { m_cbKey = ( fDeleted ? m_cbKey | LShiftFlag( fSLNDeleted ) : m_cbKey & ( ~LShiftFlag( fSLNDeleted ) ) ); } + void SetVersioned( bool fVersioned ) { m_cbKey = ( fVersioned ? m_cbKey | LShiftFlag( fSLNVersioned ) : m_cbKey & ( ~LShiftFlag( fSLNVersioned ) ) ); } + void SetCompressed( bool fCompressed ) { m_cbKey = ( fCompressed ? m_cbKey | LShiftFlag( fSLNCompressed ) : m_cbKey & ( ~LShiftFlag( fSLNCompressed ) ) ); } + +public: + // Node Basics + + bool IsValid() const { return ( m_header.IsValid() && CbKey() > 0 && CbKey() <= cbKeyMostMost ); } + SkipListNodeHdr Header() const { return m_header; } + + + // Skiplist Navigation + + int Level() const { return m_header.iLevel; } + SkipListLink LinkPrev0() const { return SkipListLinkArray{ PbLinks(), Level() + 2, 0 }[ 0 ]; } + SkipListLink LinkNext0() const { return SkipListLinkArray{ PbLinks(), Level() + 2, 0 }[ 1 ]; } + SkipListLinkArray RgLinksNext() const { return SkipListLinkArray{ PbLinks(), Level() + 2, 1 }; } // returns array of next links (LinkPrev0 isn't part of this array) + bool FDuplicateNext0() const { return m_header.fDuplicate; } + void SetLinkPrev0( SkipListLink link ) { SkipListLinkArray{ PbLinks(), Level() + 2, 0 }.SetLink( 0, link ); } + void SetDuplicateNext0( bool fDup ) { m_header.fDuplicate = fDup; } + + + // Node key/data/flags manipulation + + BBTBuffOpcode Opcode() const { return m_header.opcode; } + int CbKey() const { return ( m_cbKey & CBKEY_MASK ); } + int CbData() const { return m_cbData; } + int Cb() const { return sizeof( SkipListNode ) + CbLinks( m_header.iLevel ) + CbKey() + CbData(); } + bool FDeleted() const { return !!( m_cbKey & LShiftFlag( fSLNDeleted ) ); } + bool FVersioned() const { return !!( m_cbKey & LShiftFlag( fSLNVersioned ) ); } + bool FCompressed() const { return !!( m_cbKey & LShiftFlag( fSLNCompressed ) ); } + SkipListNodeFlags FFlags() const { return static_cast( m_cbKey >> CBKEY_FLAGS_SHIFT ); } + KEY Key() const; + DATA Data() const; + + void CopyKeyDataIntoBuffer( void* pvBuffer, int cbMost ) const; + void SetOpcode( BBTBuffOpcode opcode ); + void SetNodeKey( const KEY& key ); + void SetNodeData( const DATA& data ); + void SetNodeFlags( SkipListNodeFlags flags ); + int CmpKey( const KEY& keyRhs ) const; + + + // Creation/Sizing helpers + + static constexpr int CbLinks( int level ) + { + // Level 0 nodes contain 2 links: linkPrev0, linkNext0. + return SkipListLinkArray::Cb( level + 2 ); + } + + static constexpr int Cb( int level, int cbKey, int cbData ) + { + return sizeof( SkipListNode ) + CbLinks( level ) + cbKey + cbData; + } + + static SkipListNode* Create( void* pv, int level, BBTBuffOpcode opcode, int cbKey, int cbData ) + { + return new ( pv ) SkipListNode( level, opcode, cbKey, cbData ); + } +}; + +class SkipListAsserts +{ + static_assert( std::is_standard_layout::value, "SkipListLink should be a standard layout type." ); + static_assert( std::is_standard_layout::value, "SkipListNodeHdr should be a standard layout type." ); + static_assert( std::is_standard_layout::value, "SkipListNode should be a standard layout type." ); + static_assert( sizeof( SkipListLink ) == 4, "SkipListLink must be 4 bytes." ); + static_assert( SkipListNode::CBKEY_MASK >= cbKeyMostMost, "m_cbKey is too small for cbKeyMostMost." ); + static_assert( offsetof( SkipListNode, m_cbKey ) == 2, "SkipListNode header must be 2 bytes." ); + static_assert( SkipListNode::Cb( 0, 0, 0 ) == 11, "Minimum SkipListNode overhead must be 11 bytes." ); +}; + +PERSISTED +struct BBTBuffHeader +{ + BYTE nVersion; // bbtbuff format version + BYTE cMaxPages; // number of physical pages that make up the bbt buff + UnalignedLittleEndian le_ibMicFree; // offset where the next node is to be inserted + UnalignedLittleEndian le_cbFree; // total number of free bytes in the buff (can be different from ibMicFree because of deletes) + UnalignedLittleEndian le_cNodes; // total number of nodes in the bbt buff + BYTE rgbLinks[ SkipListLinkArray::Cb( MAX_LEVELS ) ]; // links to the first node in the bbt buff + BYTE reserved[ 2 ]; // align to 4-bytes +}; + +// Changing header size is a format breaking change! +// Make sure to handle format compatibility requirements. +static_assert( sizeof( BBTBuffHeader ) == 56, "BBTBuffHeader size changed." ); + +INLINE SkipListLinkArray RgSkipListLinksHead( BBTBuffHeader* pHeader ) +{ + return SkipListLinkArray{ pHeader->rgbLinks, MAX_LEVELS, 0 }; +} + + +#include +// END persisted structures +/////////////////////////////////////////////////////////////////////////////// + +struct PageOffsetTuple +{ + USHORT ipg; + USHORT ibOnPage; +}; + +struct BBTBuffChangeContext +{ + DBTIME dbtimeBefore; + DBTIME dbtimeCurr; + SkipListLink rgLinksPrev[ MAX_LEVELS ]; + INT level; + SkipListLink linkCurr; +}; + +class BBTBuff +{ + // Companion classes for transacted writing. + template friend class BBTBuffWriter; + template friend class IBBTBuffTrxLog; + friend class BBTBuffRedo; + friend class DbTimeGuard; + + // Debug extensions that need to inspect private state. + friend std::string DumpBBTBuff( const BBTBuff& bbtBuff, INT level, INT ib = 0 ); + friend LOCAL ERR ErrEDBGDumpBBTBuff_( BBTBuff* pBBTBuffDebuggee, INT level ); + +private: + BBTBuffHeader* m_pHeader = NULL; + CSR* m_pcsrBase = NULL; // Base page must stay latched for the lifetime of BBTBuff + CSR* m_rgcsrLatched = NULL; + SkipListNode* m_pnodeCurr = NULL; + INT m_ipgCurr = -1; + INT m_cbPageDataMax = 0; + BYTE m_cMaxPages = 0; // m_rgcsrLatched is sized based on this + LATCH m_latchType = latchNone; + BYTE m_ifmt = 0xff; + + INT m_iLevelSeedTestOnly= -1; // only used in unit testing + + // Needed for latching pages + PIB* m_ppib = NULL; + IFMP m_ifmp = ifmpNil; + +public: + BBTBuff() = default; + ~BBTBuff(); + + // Initialization + + void Load( _In_ PIB* ppib, IFMP ifmp, _In_ CSR* pcsr, _In_ CSRHeapArray& rgcsrBBT, _In_ BBTBuffHeader* pbbtbHeader, LATCH latchType ); + void Unload(); + bool FLoaded() const; + + + // Page latch manipulation + + ERR ErrLatchAll(); + ERR ErrWriteLatchAll(); + void Downgrade( LATCH latchType ); + LATCH LatchType() const { return m_latchType; } + + + // BBT buffer navigation (seeks and moves) + + enum SeekFlags : BYTE + { + sfSkipDuplicates = 0, + sfAllowDuplicates = 1 + }; + + ERR ErrSeekLEQ( const KEY& key ); + ERR ErrSeekGEQOldest( const KEY& key ); + ERR ErrMoveFirst( SeekFlags fFlags ); + ERR ErrMoveLast(); + ERR ErrMoveNext( SeekFlags fFlags ); + ERR ErrMovePrev( SeekFlags fFlags ); + ERR ErrMoveToLatestInDuplicateSequence(); + + // Gets the ith previous duplicate node of the current (without moving the current). + // piDup (in/out): ith duplicate node to get. 0 = current node. + // Returns the index of the returned node in the duplicate chain + // e.g. can return a smaller piDup, if the BBTBuff didn't have enough duplicate versions. + ERR ErrGetDuplicate( _Out_ SkipListNode** ppnodeDup, _Inout_ int* piDup ); + + + // Cursor manipulation + + void ResetCurr(); + SkipListNode* PnodeCurr() { return m_pnodeCurr; } + const SkipListNode* PnodeCurr() const { return m_pnodeCurr; } + SkipListLink GetLinkToCurrNode() const; + ERR ErrSetCurrNodeFromLink( SkipListLink link ); + + + // Invariant Format + + int CbMaxNodeSize() const { return m_cbPageDataMax; }; + int Cpg() const { Assert( FLoaded() ); return m_cMaxPages; } + static int IBBTBuffFormatForPage( int cbPage ); + static const BBTBuffFormat* PBBTBuffFormatForPage( int cbPage ); + static int CbMax(); // returns the total size of the BBT buffer + + + // Buffer space + + int CNodes() const { return m_pHeader->le_cNodes; } + int CbFree() const { return m_pHeader->le_cbFree; } + int IbFree() const { return m_pHeader->le_ibMicFree->ToInt(); } + + + // Misc + + bool FCanInsert( const KEY& key, const DATA& data, _Out_ int* pcbReq ); + SkipListNode* PnodeFromLink( SkipListLink link ) const; + + + // These operations do unlogged modifications to the BBTBuff. + // They must be logged externally. + + template + void MergeAndDelNodes( + TMergeIt itNodesMergeBegin, + const TMergeIt itNodesMergeEnd, + TDelIt itNodesDelBegin, + const TDelIt itNodesDelEnd, + SkipListLink linkIbMergeStart, + _Out_ int* pcNodesMerged, + _Out_ int* pcNodesDel ); + + + // Validation + + bool FIsHeaderValid() const; + ERR ErrAssertIsLatestInDuplicateSequence( const SkipListNode* pnode ); + ERR ErrAssertIsOldestInDuplicateSequence( const SkipListNode* pnode ); + ERR ErrValidate(); + + + // BBTBuff page initialization functions + +public: + static void InitBBTBuffRoot( FUCB* pfucb, CSR* pcsr ); + static void InitBBTBuffNonRoot( CSR* pcsr ); + static void GetBBTBuffRoot( const CSR& csr, LINE* pline ); + static BBTBuffHeader* PBBTHeader( const LINE& line ); +private: + static void InitBBTBuffHeader( _Out_ BBTBuffHeader* pbbtbHeader, _In_ int cMaxPages ); + + +private: + // Format primitives + + const BBTBuffFormat* PFormat() const; + BYTE* PbPage( int ipg ) const { return (BYTE*) Pcsr( ipg )->Cpage().PvBuffer(); } + int IbHeader() const; + int IbPageDataBegin( int ipg ) const { return ( ipg > 0 ? m_pcsrBase->Cpage().CbPageHeader() : IbHeader() + sizeof( BBTBuffHeader ) ); } + int IbPageDataEnd( int ipg ) const { return ( ipg > 0 ? g_cbPage - CPAGE::CbTagReservedLegacy() : IbHeader() + PFormat()->cbBBTRoot ); } + + + // Latch manipulation + + CSR* Pcsr( int ipg ) const { return ( ipg > 0 ? &m_rgcsrLatched[ ipg - 1 ] : m_pcsrBase ); } + ERR ErrEnsurePageLatched( SkipListLink link, LATCH latchType ); + ERR ErrEnsurePageLatched( int ipgOffset, LATCH latchType ); + void DowngradeLatches(); + void AssertLatchedAll( LATCH latchType ); + void AssertReadyForWrite(); + + + // SkiplinstLink Translation + + PageOffsetTuple IpgOffsetFromLink( SkipListLink link ) const; + SkipListLink LinkFromIpgOffset( int ipg, int ibOnPage ) const; + SkipListNode* PnodeFromIpgOffset( PageOffsetTuple pgOffset ) const; + ERR ErrPnodeFromLink_AcqLatch( SkipListLink link, SkipListNode** ppnode ); + + + // Misc + + void ChangeCurr( int ipg, SkipListNode* pnodeCurr ); + ERR ErrGetLatestInDuplicateSequence( SkipListNode* pnodeCurr, _Out_ SkipListLink* plinkLatestDup ); + static int GenLevel( int iSeed ); + int GenLevel(); + + + // Internal implementation of certain operations + +#ifdef ENABLE_JET_UNIT_TEST + // ErrSeek_() needs unit testing directly +public: +#endif + enum class SeekMode { LT, LEQ }; + enum class SeekPos { Prev, Curr }; + ERR ErrSeek_( + const KEY& key, + SeekMode seekMode, + SeekPos seekPos, + _Out_ int* piResult, + _Out_ SkipListLink rgLinks[ MAX_LEVELS ], + _Out_ SkipListNode* rgNodes[ MAX_LEVELS ] ); + +private: + SkipListNode* PnodeInsert_( + const BBTBuffChangeContext& changeCtx, + SkipListNode* rgNodes[ MAX_LEVELS ], + BBTBuffOpcode opcode, + const KEY& key, + const DATA& data, + SkipListNodeFlags flags, + bool fDuplicate ); + + void Delete_( + BBTBuffChangeContext changeCtx, + SkipListNode* rgNodes[ MAX_LEVELS ], + const SkipListNode* pnodeToDelete ); + + void RangeDelete_( SkipListLink linkFirst, int cNodes ); + + template + void CopyMergeAndDelNodes_( + BYTE** rgpbPage, + TMergeIt itNodesMergeBegin, + const TMergeIt itNodesMergeEnd, + TDelIt itNodesDelBegin, + const TDelIt itNodesDelEnd, + SkipListLink linkIbMergeStart, + _Out_ int* pcNodesMerged, + _Out_ int* pcNodesDel ); + + // Unit test hooks +#ifdef ENABLE_JET_UNIT_TEST +public: + void SetLevelSeed( int iSeed ) { m_iLevelSeedTestOnly = iSeed; } +#endif +}; + +enum CursorLocation : BYTE +{ + clocInvalid = 0, + clocFailed, + clocOnTarget, + clocOnTargetOlderVersion, // on an older version of the target node + clocBeforeTarget, + clocAfterTarget, + clocBeforeFirst, // before first node on the page (not on the tree) + clocAfterLast, // after last node on the page (not on the tree) +}; + +INLINE bool FCLocOnNode( CursorLocation cloc ) +{ + return ( cloc == clocOnTarget || cloc == clocBeforeTarget || cloc == clocAfterTarget ); +} + +INLINE bool FCLocValid( CursorLocation cloc ) +{ + return ( cloc != clocInvalid && cloc != clocFailed ); +} + +// BBTBuff Currency Stack Register and associated state. +struct BBTBuffCSR +{ + CSR csr; + CSRHeapArray rgcsrBBT; + BBTBuff bbtbuff; + SkipListLink link; + CursorLocation cursorLoc; + + BBTBuffCSR() : cursorLoc( clocInvalid ) + { + link.Nullify(); + } + + // Release all latches except base page latch. + // Caller is responsible for managing base page latch. + void Release( bool fTossImmediate = false ) + { + // Releasing latches, bbtbuff is invalid. + // But currency is still valid, i.e. link and cursorLoc are still valid. + bbtbuff.Unload(); + + if ( rgcsrBBT != NULL ) + { + for ( int i = 0; i < rgcsrBBT.CItems(); i++ ) + { + // All operations on BBTBuff pages must be kept at the same dbtime. + Assert( !rgcsrBBT[ i ].FLatched() || csr.Dbtime() == rgcsrBBT[ i ].Dbtime() ); + rgcsrBBT[ i ].ReleasePage( fTossImmediate ); + } + } + + csr.ReleasePage( fTossImmediate ); + } + + void Reset() + { + Assert( !bbtbuff.FLoaded() ); + cursorLoc = clocInvalid; + link.Nullify(); + rgcsrBBT.ForEach( []( CSR& csr ) { csr.Reset(); } ); + csr.Reset(); + } + + bool FValid() + { + // UA_TODO: fix validity criteria + return true; + } +}; + + +/////////////////////////////////////////////////////////////////////////////// +// SkipListNode INLINE methods + +INLINE KEY SkipListNode::Key() const +{ + Assert( IsValid() ); + KEY key; + key.prefix.Nullify(); + key.suffix.SetPv( PbKey() ); + key.suffix.SetCb( CbKey() ); + return key; +} + +INLINE DATA SkipListNode::Data() const +{ + Assert( IsValid() ); + DATA data; + data.SetPv( PbData() ); + data.SetCb( CbData() ); + return data; +} + +INLINE void SkipListNode::CopyKeyDataIntoBuffer( void* pvBuffer, int cbMost ) const +{ + Assert( PbData() == PbKey() + CbKey() ); // data must immediately follow the key + INT cbCopy = min( cbMost, CbKey() + CbData() ); + UtilMemCpy( pvBuffer, PbKey(), cbCopy ); +} + +INLINE void SkipListNode::SetOpcode( BBTBuffOpcode opcode ) +{ + m_header.opcode = opcode; +} + +INLINE void SkipListNode::SetNodeKey( const KEY& key ) +{ + Assert( key.Cb() == CbKey() ); + key.CopyIntoBuffer( PbKey(), CbKey() ); +} + +INLINE void SkipListNode::SetNodeData( const DATA& data ) +{ + Assert( data.Cb() == CbData() ); + UtilMemCpy( PbData(), data.Pv(), CbData() ); +} + +INLINE void SkipListNode::SetNodeFlags( SkipListNodeFlags flags ) +{ + USHORT usFlags = static_cast( flags ) << CBKEY_FLAGS_SHIFT; + Assert( 0 == ( usFlags & CBKEY_MASK ) ); + m_cbKey |= usFlags; +} + +INLINE int SkipListNode::CmpKey( const KEY& keyRhs ) const +{ + KEY keyLhs; + keyLhs.prefix.Nullify(); + keyLhs.suffix.SetPv( PbKey() ); + keyLhs.suffix.SetCb( CbKey() ); + return ::CmpKey( keyLhs, keyRhs ); +} + + +/////////////////////////////////////////////////////////////////////////////// +// BBTBuff INLINE methods + +INLINE const BBTBuffFormat* BBTBuff::PFormat() const +{ + Assert( m_ifmt < _countof( BBTBUFF_FORMAT_CONSTANTS ) ); + return &BBTBUFF_FORMAT_CONSTANTS[ m_ifmt ]; +} + +INLINE int BBTBuff::IbHeader() const +{ + auto ib = ( (BYTE*) m_pHeader ) - PbPage( 0 ); + Assert( ib < g_cbPage ); + return (int) ib; +} + +INLINE PageOffsetTuple BBTBuff::IpgOffsetFromLink( SkipListLink link ) const +{ + Assert( !link.FNull() ); + PageOffsetTuple tuple; + int ib = link.ToInt(); + ib -= PFormat()->cbBBTRoot; + if ( ib >= 0 ) + { + tuple.ipg = (USHORT) ( 1 + ib / m_cbPageDataMax ); + tuple.ibOnPage = (USHORT) ( m_pcsrBase->Cpage().CbPageHeader() + ( ib % m_cbPageDataMax ) ); // SkipListLink::ib = 0 points to BBTBuffHeader + } + else + { + tuple.ipg = 0; + tuple.ibOnPage = (USHORT) ( link.ToInt() + IbHeader() ); + } + + Assert( tuple.ibOnPage != 0 ); + return tuple; +} + +INLINE SkipListLink BBTBuff::LinkFromIpgOffset( int ipg, int ibOnPage ) const +{ + Assert( ipg < Cpg() ); + Assert( ibOnPage != 0 ); + + int ib; + if ( ipg > 0 ) + { + ib = PFormat()->cbBBTRoot; + ib += ( ipg - 1 ) * m_cbPageDataMax; + ib += ( ibOnPage - m_pcsrBase->Cpage().CbPageHeader() ); // SkipListLink::ib = 0 points to BTBuffHeader + } + else + { + ib = ibOnPage - IbHeader(); + } + + Assert( !( ib & 0x03 ) ); + return SkipListLink::FromInt( ib ); +} + +INLINE SkipListNode* BBTBuff::PnodeFromIpgOffset( PageOffsetTuple pgOffset ) const +{ + Assert( Pcsr( pgOffset.ipg )->FLatched() ); + auto pnode = (SkipListNode*) ( PbPage( pgOffset.ipg ) + pgOffset.ibOnPage ); + Assert( pnode->IsValid() ); + return pnode; +} + +INLINE SkipListNode* BBTBuff::PnodeFromLink( SkipListLink link ) const +{ + if ( link.FNull() ) + { + return NULL; + } + + auto pgOffset = IpgOffsetFromLink( link ); + Assert( Pcsr( pgOffset.ipg )->FLatched() ); + return PnodeFromIpgOffset( pgOffset ); +} + +INLINE ERR BBTBuff::ErrPnodeFromLink_AcqLatch( SkipListLink link, SkipListNode** ppnode ) +{ + ERR err = JET_errSuccess; + + if ( link.FNull() ) + { + *ppnode = NULL; + return err; + } + + auto pgOffset = IpgOffsetFromLink( link ); + if ( !Pcsr( pgOffset.ipg )->FLatched() ) + { + CallR( ErrEnsurePageLatched( pgOffset.ipg, m_latchType ) ); + } + + *ppnode = PnodeFromIpgOffset( pgOffset ); + return err; +} + +INLINE SkipListLink BBTBuff::GetLinkToCurrNode() const +{ + if ( m_pnodeCurr == NULL ) + { + return SkipListLink::Null(); + } + + BYTE* pbPage = PbPage( m_ipgCurr ); + Assert( m_pnodeCurr->IsValid() ); + Assert( m_ipgCurr >= 0 ); + Assert( Pcsr( m_ipgCurr )->FLatched() && pbPage != NULL ); + + BYTE* pb = (BYTE*) m_pnodeCurr; + EnforceSz( pb >= pbPage, "BBTBuff corruption !" ); + EnforceSz( pb < ( pbPage + g_cbPage ), "BBTBuff corruption !" ); + EnforceSz( pb + m_pnodeCurr->Cb() < ( pbPage + g_cbPage ), "BBTBuff corruption !" ); + + auto ibOnPage = pb - pbPage; + Assert( ibOnPage + m_pnodeCurr->Cb() <= IbPageDataEnd( m_ipgCurr ) ); + SkipListLink link = LinkFromIpgOffset( m_ipgCurr, (int) ibOnPage ); + Assert( link.ToInt() < m_pHeader->le_ibMicFree->ToInt() ); + return link; +} + +INLINE void BBTBuff::ResetCurr() +{ + Assert( ( m_ipgCurr < 0 ) == ( m_pnodeCurr == NULL ) ); + m_ipgCurr = -1; + m_pnodeCurr = NULL; +} + +INLINE void BBTBuff::ChangeCurr( int ipg, SkipListNode* pnodeCurr ) +{ + Assert( ipg >= 0 && ipg < Cpg() ); + Assert( Pcsr( ipg )->FLatched() ); + + BYTE* pb = (BYTE*) pnodeCurr; + BYTE* pbPage = PbPage( ipg ); + Assert( pb >= pbPage ); + Assert( pb < ( pbPage + g_cbPage ) ); + Assert( pb + pnodeCurr->Cb() < ( pbPage + g_cbPage ) ); + + m_ipgCurr = ipg; + m_pnodeCurr = pnodeCurr; +} + +INLINE int BBTBuff::GenLevel( int iSeed ) +{ + int level = 0; + const int SEED_MAX = 1 << ( MAX_LEVELS - 1 );// a max value of 0x7fff will generate a level of 15 + for ( int r = iSeed % SEED_MAX; ( r & 1 ); r >>= 1 ) + { + level++; + } + + return level; +} + +INLINE int BBTBuff::GenLevel() +{ +#ifdef ENABLE_JET_UNIT_TEST + return GenLevel( m_iLevelSeedTestOnly >= 0 ? m_iLevelSeedTestOnly : rand() ); +#else + return GenLevel( rand() ); +#endif +} + +INLINE int BBTBuff::IBBTBuffFormatForPage( int cbPage ) +{ + Assert( FPowerOf2( cbPage ) ); + unsigned long ulFmt = Log2OfPowerOf2( (unsigned long) cbPage ) - 12; // 2^12 = 4k, first entry in the format constants table + const BBTBuffFormat* pFormat = &BBTBUFF_FORMAT_CONSTANTS[ ulFmt ]; + Assert( pFormat->cbCPAGE == cbPage ); + return (int) ulFmt; +} + +INLINE const BBTBuffFormat* BBTBuff::PBBTBuffFormatForPage( int cbPage ) +{ + return &BBTBUFF_FORMAT_CONSTANTS[ IBBTBuffFormatForPage( cbPage ) ]; +} + +INLINE int BBTBuff::CbMax() +{ + // BBTBuff doesn't insert tags/ilines. It uses the page data space as one big buffer. + // But to keep CPAGE checks happy, we can't reclaim the reserved tag. + const BBTBuffFormat* pFormat = PBBTBuffFormatForPage( g_cbPage ); + return pFormat->cbBBTRoot + ( pFormat->cpgInBBTBuff - 1 ) * ( CPAGE::CbPageDataMaxNoInsert( g_cbPage ) ); +} + +// Performs an evict operation, deleting required local nodes, while merging external nodes. +// This operation must be externally logged. +template +void BBTBuff::MergeAndDelNodes( + TMergeIt itNodesMergeBegin, + const TMergeIt itNodesMergeEnd, + TDelIt itNodesDelBegin, + const TDelIt itNodesDelEnd, + SkipListLink linkIbMergeStart, + _Out_ int* pcNodesMerged, + _Out_ int* pcNodesDel ) +{ + AssertReadyForWrite(); // must already be dirtied + + BYTE** rgpbPage = (BYTE**) _alloca( sizeof( BYTE* ) * Cpg() ); + Assert( rgpbPage ); + + for ( int i = 0; i < Cpg(); i++ ) + { + BFAlloc( bfasTemporary, (void**) &rgpbPage[ i ], m_pcsrBase->Cpage().CbPage() ); // can't fail + Assert( rgpbPage[ i ] ); + } + + CopyMergeAndDelNodes_( + rgpbPage, + itNodesMergeBegin, + itNodesMergeEnd, + itNodesDelBegin, + itNodesDelEnd, + linkIbMergeStart, + pcNodesMerged, + pcNodesDel ); + + // Copy root page (BBTBuffHeader + any data) + memcpy( + PbPage( 0 ) + IbHeader(), + rgpbPage[ 0 ] + IbHeader(), + PFormat()->cbRoot ); + + // if the last node fits perfectly at the end of the last page, ibMicFree can point to the next page + int ipgLast = min( Cpg() - 1, IpgOffsetFromLink( m_pHeader->le_ibMicFree ).ipg ); + for ( int i = 1; i <= ipgLast; i++ ) + { + // Copy back the page at the appropriate offset + int cbCopy = IbPageDataEnd( i ) - IbPageDataBegin( i ); + memcpy( + PbPage( i ) + IbPageDataBegin( i ), + rgpbPage[ i ] + IbPageDataBegin( i ), + cbCopy ); + } + + // UA_TODO: pattern-fill leftover space in pages + + // Cleanup allocated memory + for ( int i = 0; i < Cpg(); i++ ) + { + BFFree( rgpbPage[ i ] ); + } +} + +// Makes a copy of the current BBTBuff while deleting specified nodes and merging in as many nodes as can fit in. +// Inputs: +// 1. An array of BYTE* pointers pointing to allocated memory for each page in the BBTBuff. +// 2. A LegacyForwadrIterator specifying merged nodes. The iterator should return pointers to objects that can pass for a SkipListNode. +// 3. A LegacyForwardIterator specifying deleted nodes. The iterator should return on-page addresses of nodes that should be deleted. +// 4. Offset in BBTBuff where the merged nodes are placed. +// +// Outputs: +// 1. Count of nodes merged (can be less than the input). +// 2. Count of nodes deleted. +template +void BBTBuff::CopyMergeAndDelNodes_( + BYTE** rgpbPage, + TMergeIt itNodesMergeBegin, + const TMergeIt itNodesMergeEnd, + TDelIt itNodesDelBegin, + const TDelIt itNodesDelEnd, + SkipListLink linkIbMergeStart, + _Out_ int* pcNodesMerged, + _Out_ int* pcNodesDel ) +{ + using TMergeNode = std::iterator_traits::value_type; + using TDelItValue = std::iterator_traits::value_type; + enum NodeSource { nsInvalid = 0, nsLocal, nsMerge }; + static_assert( std::is_same::value == true, "TDelIt must return SkipListLink" ); + + // The new pages being constructed have to be mem-copyable into the actual cpage. So we need + // to obey the current strucutre of the pages comprising the BBTBuff. + // This means that we need to construct the skip list to make sure that it begins at offsets + // dictated by the current root (for the root page). For the other pages, it needs to obey + // the current structure of the page, which for now is just leaving space for the page header. + + // Non-standard, should use std::distance() but requires RandomAccessIterator, + // that we don't want to support. + auto cNodesToMerge = itNodesMergeEnd - itNodesMergeBegin; + Assert( cNodesToMerge <= INT32_MAX ); + + const SkipListLinkArray rgLinksHead = RgSkipListLinksHead( m_pHeader ); + SkipListNode* rgpNodePrevAtLevel[ MAX_LEVELS ] = { 0 }; + TMergeNode pnodeMerge = ( itNodesMergeBegin != itNodesMergeEnd ? *itNodesMergeBegin : nullptr ); + SkipListLink linkDel = ( itNodesDelBegin != itNodesDelEnd ? *itNodesDelBegin : SkipListLink::Null() ); + SkipListLink linkLocal = rgLinksHead[ 0 ]; + SkipListNode* pnodeLocal = PnodeFromLink( linkLocal ); + SkipListNode* pnodePrevNew = NULL; + int cNodes = 0; + int iNodesMerge = 0; + int iNodesDel = 0; + int cbFree = CbMax() - sizeof( BBTBuffHeader ); + SkipListLink ibCurrLocal = SkipListLink::FromInt( sizeof( BBTBuffHeader ) ); // leave space for the header + SkipListLink ibCurrMerge = linkIbMergeStart; + SkipListLink linkPrev0( 0 ); + BBTBuffHeader* pHeader = (BBTBuffHeader*) ( rgpbPage[ 0 ] + IbHeader() ); + SkipListLinkArray rgLinksHeadNew = RgSkipListLinksHead( pHeader ); + + while( pnodeLocal != NULL || pnodeMerge != NULL ) + { + NodeSource nodeSrcCurr = nsInvalid; + SkipListLink* pibCurr = NULL; + if ( pnodeLocal != NULL && pnodeMerge != NULL ) + { + int result = pnodeLocal->CmpKey( pnodeMerge->Key() ); + if ( result <= 0 ) + { + nodeSrcCurr = nsLocal; + pibCurr = &ibCurrLocal; + } + else + { + // Evicted nodes from ancestors are more recent, must be inserted later than equal local nodes. + nodeSrcCurr = nsMerge; + pibCurr = &ibCurrMerge; + } + } + else if ( pnodeLocal != NULL ) + { + nodeSrcCurr = nsLocal; + pibCurr = &ibCurrLocal; + } + else + { + Assert( pnodeMerge != NULL ); + nodeSrcCurr = nsMerge; + pibCurr = &ibCurrMerge; + } + + // Skip if the current node matches a node in delete sequence + if ( nodeSrcCurr == nsMerge || linkLocal != linkDel ) + { + // Compute space needed for the current node + // + + // Re-level deterministically, we can because we are appending sequentially. + // Remove the effect of merged nodes on local node levels. This is required to generate local nodes + // with the same size as we calculated earlier and reserved ( ErrReorg_CalcIbMerge() ). + // Not doing so can result in overrunning our estimate, causing local nodes not to fit. + // UA_TODO: The skip list will not be optimally laid out. But I don't think it is a problem. + int level = GenLevel( nodeSrcCurr == nsLocal ? cNodes - iNodesMerge : cNodes ); + BBTBuffOpcode opcodeCurr = ( nodeSrcCurr == nsLocal ? pnodeLocal->Opcode() : pnodeMerge->Opcode() ); + KEY keyCurr = ( nodeSrcCurr == nsLocal ? pnodeLocal->Key() : pnodeMerge->Key() ); + DATA dataCurr = ( nodeSrcCurr == nsLocal ? pnodeLocal->Data() : pnodeMerge->Data() ); + int cbUsed = SkipListLink::Roundup( SkipListNode::Cb( level, keyCurr.Cb(), dataCurr.Cb() ) ); // count wasted space too + + if ( cbUsed > CbMaxNodeSize() ) + { + // Generate level 0 node to eliminate overhead as much as we can. + level = 0; + cbUsed = SkipListLink::Roundup( SkipListNode::Cb( level, keyCurr.Cb(), dataCurr.Cb() ) ); + EnforceSz( cbUsed <= CbMaxNodeSize(), "BBTBuff MergeNodeTooBig" ); // how was this node able to fit earlier? + } + + SkipListLink ibPrev = *pibCurr; // needed to calcualte wasted space at the end of a page + PageOffsetTuple pgOffsetCurr = IpgOffsetFromLink( *pibCurr ); + const int cbLeft = ( pgOffsetCurr.ipg < Cpg() ? IbPageDataEnd( pgOffsetCurr.ipg ) - pgOffsetCurr.ibOnPage : 0 ); + if ( cbUsed > cbLeft ) + { + // Move to new page if current page is full + pgOffsetCurr.ipg++; + if ( pgOffsetCurr.ipg >= Cpg() ) + { + if ( nodeSrcCurr == nsMerge ) + { + // Ran out of space, can't merge external nodes anymore. + // But we still have to merge all the local nodes. + // This will leave pseqNodesMerge positioned on the last node to fail merge, + // and then will proceed to merge in all local nodes. + pnodeMerge = NULL; + continue; // will try merging current pnodeLocal next + } + else + { + // This should never hit. We reserve the space needed to copy over local nodes, + // and only merge in as many external nodes as can fit. + EnforceSz( false, "BBTBuff MergeEvictedOverflow" ); + } + } + + Assert( rgpbPage[ pgOffsetCurr.ipg ] != NULL ); + pgOffsetCurr.ibOnPage = (USHORT) IbPageDataBegin( pgOffsetCurr.ipg ); + *pibCurr = LinkFromIpgOffset( pgOffsetCurr.ipg, pgOffsetCurr.ibOnPage ); + } + + // Alloc and copy to new node + SkipListLink ibNew = *pibCurr; + pibCurr->Inc( cbUsed ); + cbFree -= ( pibCurr->ToInt() - ibPrev.ToInt() ); // count wasted space because of a page switch, too + + EnforceSz( ibCurrLocal.ToInt() <= linkIbMergeStart.ToInt(), "BBTBuff ReorgLocalNodeOverflow" ); + + SkipListNode* pnodeNew = SkipListNode::Create( rgpbPage[ pgOffsetCurr.ipg ] + pgOffsetCurr.ibOnPage, level, opcodeCurr, keyCurr.Cb(), dataCurr.Cb() ); + pnodeNew->SetNodeKey( keyCurr ); + pnodeNew->SetNodeData( dataCurr ); + pnodeNew->SetNodeFlags( nodeSrcCurr == nsLocal ? pnodeLocal->FFlags() : pnodeMerge->FFlags() ); + + // Recompute duplicate flag + Assert( linkPrev0.FNull() == ( pnodePrevNew == NULL ) ); + if ( pnodePrevNew != NULL ) + { + // Since we are only adding nodes, technically we can only recompute when FDuplicate() is false. + // But this can be broken if a caller combines duplicate nodes into 1, invalidating duplicate flags of the merge sequence. + // So always recompute. + Assert( !pnodePrevNew->FDuplicateNext0() ); + if ( pnodePrevNew->CmpKey( pnodeNew->Key() ) == 0 ) + { + pnodePrevNew->SetDuplicateNext0( true ); + } + } + + // Adjust links + pnodeNew->SetLinkPrev0( linkPrev0 ); + SkipListLinkArray rgLinksNew = pnodeNew->RgLinksNext(); + for ( int i = 0; i <= level; i++ ) + { + ( rgpNodePrevAtLevel[ i ] != NULL ? rgpNodePrevAtLevel[ i ]->RgLinksNext() : rgLinksHeadNew ) + .SetLink( i, ibNew ); + rgpNodePrevAtLevel[ i ] = pnodeNew; + } + + linkPrev0 = ibNew; + pnodePrevNew = pnodeNew; + cNodes++; + } + else + { + // Node should be deleted. + // Deleted node was identified by a link comparison. This works because nodes can't move around during evict-merge. + // Local links/pointers only become invalid after the merge copies back data to BBTBuff. + Assert( nodeSrcCurr == nsLocal ); // Only local nodes allowed in del sequence. + ++iNodesDel; + ++itNodesDelBegin; + linkDel = ( itNodesDelBegin != itNodesDelEnd ? *itNodesDelBegin : SkipListLink::Null() ); + } + + // MoveNext + if ( nodeSrcCurr == nsLocal ) + { + linkLocal = pnodeLocal->LinkNext0(); + pnodeLocal = PnodeFromLink( pnodeLocal->LinkNext0() ); + } + else + { + Assert( nodeSrcCurr == nsMerge ); + ++iNodesMerge; + ++itNodesMergeBegin; + pnodeMerge = ( itNodesMergeBegin != itNodesMergeEnd ? *itNodesMergeBegin : nullptr ); + } + } + + Assert( linkDel.FNull() ); // all deleted nodes should've been matched + EnforceSz( cNodes == m_pHeader->le_cNodes + iNodesMerge - iNodesDel, "BBTBuffReorg MissingNodes" ); + EnforceSz( ibCurrMerge.ToInt() == CbMax() - cbFree, "BBTBuffReorg BadSpace" ); + + // Null-terminate all levels of the new skip list. + SkipListLink linkNull( 0 ); + for ( int i = 0; i < MAX_LEVELS; i++ ) + { + ( rgpNodePrevAtLevel[ i ] != NULL ? rgpNodePrevAtLevel[ i ]->RgLinksNext() : rgLinksHeadNew ) + .SetLink( i, linkNull ); + } + + // Fix the new header + pHeader->nVersion = m_pHeader->nVersion; + pHeader->cMaxPages = m_pHeader->cMaxPages; + pHeader->le_cNodes = cNodes; + pHeader->le_cbFree = cbFree; + pHeader->le_ibMicFree = ibCurrMerge; + + Assert( !rgLinksHeadNew[ 0 ].FNull() || cNodes == 0 ); + + // pHeader->rgSkipListLinksHead has already been fixed up + // We are done. New re-organized pages are in rgpbPage. + if ( pcNodesMerged != NULL ) + { + *pcNodesMerged = iNodesMerge; + } + if ( pcNodesDel != NULL ) + { + *pcNodesDel = iNodesDel; + } +} diff --git a/dev/ese/src/inc/bbtbuffwriter.hxx b/dev/ese/src/inc/bbtbuffwriter.hxx new file mode 100644 index 00000000..27345bba --- /dev/null +++ b/dev/ese/src/inc/bbtbuffwriter.hxx @@ -0,0 +1,775 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#pragma once +#include "bbtbuff.hxx" + +/************************************************************************************************************ +Provides a wrapper that allows transacted write operations on a BBTBuff. +IBBTBuffTrxLog defines the interface that can be implemented to capture the operations in a trx log. +The implementation of IBBTBuffTrxLog is responsible for correctly recording the details of the operation, +and ensuring durability/redo guarantees. + +BBTBuffWriter only guarantees that each write operation is atomic, and in case of a failure, +there will be no side-effects and the operation will be cleanly undone. + +Certain trx log concepts from the engine above have leaked into the BBTBuffWriter. +Following are managed here for the BBT buffer. +1. DBTIMEs and page dirtying. +************************************************************************************************************/ + +// Interface class for a transaction logger needed for write operations on a BBTBuff. +// Uses CRTP to do static polymorphism. +template +class IBBTBuffTrxLog +{ +public: + BBTBuff* const m_pbbtbuff; + DBTIME m_dbtimeCoordinated = dbtimeNil; + + IBBTBuffTrxLog( BBTBuff * pbbtbuff ) : m_pbbtbuff( pbbtbuff ) + {} + + ERR ErrLogInsert( + const BBTBuffChangeContext& changeCtx, + BBTBuffOpcode opcode, + const KEY& key, + const DATA& data, + SkipListNodeFlags flags ) + { + return static_cast( this )->ErrLogInsert( changeCtx, opcode, key, data, flags ); + } + + ERR ErrLogDelete( + const BBTBuffChangeContext& changeCtx, + const SkipListNode* pnodeDel ) + { + return static_cast( this )->ErrLogDelete( changeCtx, pnodeDel ); + } + + ERR ErrLogRangeDelete( + DBTIME dbtimeBefore, + DBTIME dbtimeCurr, + SkipListLink linkFirst, + int cNodes ) + { + return static_cast( this )->ErrLogRangeDelete( dbtimeBefore, dbtimeCurr, linkFirst, cNodes ); + } + + ERR ErrLogMergeAndDel( + DBTIME dbtimeBefore, + DBTIME dbtimeCurr, + _In_count_( cNodesMerged ) const SkipListNode** rgNodesMerged, + _In_count_( cNodesDel ) SkipListLink* rgLinksDel, + int cNodesMerged, + int cNodesDel, + SkipListLink linkIbMergeStart ) + { + return static_cast( this )->ErrLogMergeAndDel( dbtimeBefore, dbtimeCurr, rgNodesMerged, rgLinksDel, cNodesMerged, cNodesDel, linkIbMergeStart ); + } + +protected: + // BBTBuff interface exposed to actual implementations of IBBTBuffTrxLog + + int Cpg() const { return m_pbbtbuff->Cpg(); } + CPAGE& BBTBuffCpage( int ipg ) { return m_pbbtbuff->Pcsr( ipg )->Cpage(); } +}; + +// Helper class to manage CoordinatedDirty on a BBTBuff. +// If commit isn't called, the dbtimes will be reverted back by the destructor. +class DbTimeGuard +{ + BBTBuff* m_pbbtBuff; + DBTIME m_dbtimeBefore; + ULONG m_fPageFlags; + +public: + DbTimeGuard( BBTBuff* pbbtBuff ) : + m_pbbtBuff( pbbtBuff ), + m_dbtimeBefore( pbbtBuff->m_pcsrBase->Dbtime() ) + {} + + void CoordinatedDirty( DBTIME dbtime ) + { + m_fPageFlags = m_pbbtBuff->m_pcsrBase->Cpage().FFlags(); + Assert( ( m_fPageFlags & CPAGE::fPageBBTBuffRoot ) && ( m_fPageFlags & CPAGE::fPageBBTBuff ) ); + m_fPageFlags &= ( ~CPAGE::fPageBBTBuffRoot ); // Remove flag because it should only be present on the base page + + // Requires pageset to be setup correctly + dbtime == dbtimeNil ? m_pbbtBuff->m_pcsrBase->Dirty() : m_pbbtBuff->m_pcsrBase->CoordinatedDirty( dbtime ); + dbtime = m_pbbtBuff->m_pcsrBase->Dbtime(); + + for ( int i = 0; i < m_pbbtBuff->Cpg() - 1; i++ ) + { + Assert( m_fPageFlags == m_pbbtBuff->m_rgcsrLatched[ i ].Cpage().FFlags() ); + Assert( m_pbbtBuff->m_rgcsrLatched[ i ].Dbtime() == m_dbtimeBefore ); + m_pbbtBuff->m_rgcsrLatched[ i ].CoordinatedDirty( dbtime ); + } + } + + void Commit() { m_dbtimeBefore = dbtimeNil; } + ~DbTimeGuard() + { + if ( m_dbtimeBefore != dbtimeNil ) + { + // DbTimeGuard doesn't protect against page flag modifications. + // BBTBuff doesn't modify any page flags during its write operations. + + m_pbbtBuff->m_pcsrBase->RevertDbtime( m_dbtimeBefore, m_fPageFlags | CPAGE::fPageBBTBuffRoot ); + for ( int i = 0; i < m_pbbtBuff->Cpg() - 1; i++ ) + { + m_pbbtBuff->m_rgcsrLatched[ i ].RevertDbtime( m_dbtimeBefore, m_fPageFlags ); + } + } + } + + DBTIME DbtimeBefore() + { + Assert( m_dbtimeBefore >= dbtimeStart ); + return m_dbtimeBefore; + } + + DBTIME DbtimeNew() + { + Assert( m_dbtimeBefore >= dbtimeStart ); + return m_pbbtBuff->m_pcsrBase->Dbtime(); + } +}; + +// A simple iterator over a sequence of SkipListNode* +// The iterator starts positioned before the first element (like c# IEnumerator). Call Next() before calling Curr(). +// Provides a level of indirection to implement more complex iterators over a sequence of SkipListNode* +// (required by FT Split/Evict) +// can be generalized using standard c++ iterators and a type-erasing any_iterator +class INodeSequence +{ +public: + virtual const SkipListNode* Curr() = 0; + virtual bool Next() = 0; + virtual void Reset() = 0; +}; + +// A node sequence over a BBTBuff +class NodeSequenceBBTBuff : public INodeSequence +{ + BBTBuff& m_bbtbuff; + SkipListLink m_linkFirst; + BBTBuff::SeekFlags m_fSeekFlags; + bool m_fBeforeFirst; // BBTBuff doesn't support BeforeFirst cursor position + +public: + NodeSequenceBBTBuff( BBTBuff& bbtbuff, BBTBuff::SeekFlags fFlags ) : + m_bbtbuff( bbtbuff ), + m_linkFirst( bbtbuff.GetLinkToCurrNode() ), + m_fSeekFlags( fFlags ), + m_fBeforeFirst( true ) + {} + + virtual const SkipListNode* Curr() { return ( !m_fBeforeFirst ? m_bbtbuff.PnodeCurr() : NULL ); } + virtual bool Next() + { + if ( !m_fBeforeFirst ) + { + ERR err = m_bbtbuff.ErrMoveNext( m_fSeekFlags ); + if ( err != errBBTNodeNotFound ) + { + EnforceSz( err >= JET_errSuccess, "NodeSequenceBBTBuff_MoveNext" ); + return true; + } + else + { + return false; + } + } + else + { + m_fBeforeFirst = false; + return true; + } + } + + virtual void Reset() + { + ERR err = m_bbtbuff.ErrSetCurrNodeFromLink( m_linkFirst ); + EnforceSz( err >= JET_errSuccess, "NodeSequenceBBTBuff_Reset" ); + m_fBeforeFirst = true; + } +}; + +// Provides transacted writes on a BBTBuff. +template +class BBTBuffWriter +{ +private: + BBTBuff* m_pbbtbuff; + IBBTBuffTrxLog* m_pTrxLogger; + +public: + BBTBuffWriter( BBTBuff* pbbtbuff, IBBTBuffTrxLog* pTrxLogger ) : + m_pbbtbuff( pbbtbuff ), + m_pTrxLogger( pTrxLogger ) + { + Assert( pbbtbuff == pTrxLogger->m_pbbtbuff ); + } + +private: + void CoordinatedDirty(); + SkipListLink GetLinkNew( int cb ); + ERR ErrReorg_ProcessDeletes( + _In_count_( cLinksDel ) SkipListLink* rgLinksDel, + const int cLinksDel, + _Out_ SkipListLink* plinkIbMerge ); + +public: + ERR ErrUpgradeToWriteMode(); + ERR ErrInsert( BBTBuffOpcode opcode, const KEY& key, const DATA& data, SkipListNodeFlags flags ); + ERR ErrDelete( const KEY& key ); + ERR ErrFlagDelete( SkipListLink link ); + ERR ErrReorganize(); + + + // APIs for Evict + + ERR ErrRangeDelete( SkipListLink linkFirst, int cNodes ); // deletes a range of nodes defined by link, count + + // Merges in the given sequence of nodes and deleting the nodes in the delete sequence, while potentially reorganizing the buffer if needed. + ERR ErrMergeAndDelNodes( + _In_count_( cNodesMerge ) const SkipListNode** rgNodesMerge, + _In_count_( cNodesToDel ) SkipListLink* rgLinksDel, + const int cNodesMerge, + const int cNodesToDel, + _Out_ int* pcNodesMerged, + _Out_ int* pcNodesDel, + int cbReorgThreshold ); +}; + +// Uses dbtime of the root to dirty the rest of the pages +template +void BBTBuffWriter::CoordinatedDirty() +{ + // All pages should be write-latched and root already dirtied + Assert( m_pbbtbuff->m_pcsrBase->FDirty() ); + DBTIME dbtime = m_pbbtbuff->m_pcsrBase->Dbtime(); + + for ( int i = 0; i < m_pbbtbuff->Cpg() - 1; i++ ) + { + m_pbbtbuff->m_rgcsrLatched[ i ].CoordinatedDirty( dbtime, bfdfDirty ); + } +} + +template +ERR BBTBuffWriter::ErrUpgradeToWriteMode() +{ + ERR err = JET_errSuccess; + + Assert( m_pbbtbuff->m_pcsrBase->FLatched() ); + m_pbbtbuff->m_latchType = m_pbbtbuff->m_pcsrBase->Latch(); + + for ( int i = 0; i < m_pbbtbuff->Cpg(); i++ ) + { + CSR* pcsr = m_pbbtbuff->Pcsr( i ); + LATCH latchCurr = pcsr->Latch(); + if ( latchCurr == latchReadTouch || latchCurr == latchReadNoTouch ) + { + err = pcsr->ErrUpgradeFromReadLatch(); + if ( err < JET_errSuccess ) + { + // we lose our latch if the upgrade fails + // BBTBuff is unusable and we must release all latches. + for ( int j = 0; j < m_pbbtbuff->Cpg(); j++ ) + { + m_pbbtbuff->Pcsr( j )->ReleasePage(); + } + + m_pbbtbuff->Unload(); + return err; + } + } + else if ( latchCurr == latchRIW ) + { + pcsr->UpgradeFromRIWLatch(); + } + else + { + Assert( latchCurr == latchNone || latchCurr == latchWrite ); + } + } + + m_pbbtbuff->m_latchType = latchWrite; + return err; +} + +template +SkipListLink BBTBuffWriter::GetLinkNew( int cb ) +{ + Assert( latchWrite == m_pbbtbuff->m_pcsrBase->Latch() ); + + BBTBuffHeader* pHeader = m_pbbtbuff->m_pHeader; + SkipListLink ibMicFree = pHeader->le_ibMicFree; + PageOffsetTuple pgOffset = m_pbbtbuff->IpgOffsetFromLink( pHeader->le_ibMicFree ); + int cbUsed = SkipListLink::Roundup( cb ); // count wasted space too + Assert( cbUsed >= cb ); + + if ( cbUsed > m_pbbtbuff->IbPageDataEnd( pgOffset.ipg ) - pgOffset.ibOnPage ) + { + if ( pgOffset.ipg < m_pbbtbuff->Cpg() - 1 ) + { + pgOffset.ipg++; + ibMicFree = m_pbbtbuff->LinkFromIpgOffset( pgOffset.ipg, m_pbbtbuff->IbPageDataBegin( pgOffset.ipg ) ); + } + else + { + // Caller should have checked for free space before calling this function + EnforceSz( false, "BBTBuff: errBBTBuffFull" ); + } + } + + // Caller should ensure that the page referenced by the returned link is latched + return ibMicFree; +} + +// Inserts a new node into the list. +// Duplicate nodes are inserted at the tail of a duplicate sequence. +template +ERR BBTBuffWriter::ErrInsert( BBTBuffOpcode opcode, const KEY& key, const DATA& data, SkipListNodeFlags flags ) +{ + ERR err = JET_errSuccess; + + m_pbbtbuff->ResetCurr(); + Call( m_pbbtbuff->ErrWriteLatchAll() ); + + // Check/prepare for the insertion + // All modifications are deferred until we know that insert can succeed unconditionally + { + BBTBuffHeader* pHeader = m_pbbtbuff->m_pHeader; + BBTBuffChangeContext changeCtx{}; // zero-initializes + int level = m_pbbtbuff->GenLevel(); + int cbNode = SkipListNode::Cb( level, key.Cb(), data.Cb() ); + + // Check for space and max supported node size. + if ( cbNode > m_pbbtbuff->CbMax() - pHeader->le_ibMicFree->ToInt() || + cbNode > m_pbbtbuff->CbMaxNodeSize() ) + { + // Check again with level 0 node. + level = 0; + cbNode = SkipListNode::Cb( level, key.Cb(), data.Cb() ); + EnforceSz( cbNode <= m_pbbtbuff->CbMaxNodeSize(), "BBTBuff MaxNodeSizeExceeded" ); + + if ( cbNode > m_pbbtbuff->CbMax() - pHeader->le_ibMicFree->ToInt() ) + { + Call( ErrERRCheck( errBBTBuffFull ) ); + } + } + + int result; + SkipListNode* rgNodes[ MAX_LEVELS ]; + Call( m_pbbtbuff->ErrSeek_( key, BBTBuff::SeekMode::LEQ, BBTBuff::SeekPos::Curr, &result, changeCtx.rgLinksPrev, rgNodes ) ); + bool fDuplicate = ( result == 0 ); + + // ErrSeek_() gives us the links to each node at every level at the insertion point. + // To insert, we only need to fix the links that are at the same level or below as the newly inserted node. + + // Get node next to the insertion point at level 0 + // This node's m_linkPrev0 pointer needs to be modified + SkipListLink linkNext0 = ( rgNodes[ 0 ] ? rgNodes[ 0 ]->LinkNext0() : RgSkipListLinksHead( pHeader )[ 0 ] ); + + SkipListLink linkNew = GetLinkNew( cbNode ); + changeCtx.linkCurr = linkNew; + changeCtx.level = level; + + // All preparation/checks succeeded + // LOG the insert operation + { + DbTimeGuard dbtimeGuard( m_pbbtbuff ); + dbtimeGuard.CoordinatedDirty( m_pTrxLogger->m_dbtimeCoordinated ); + changeCtx.dbtimeBefore = dbtimeGuard.DbtimeBefore(); + changeCtx.dbtimeCurr = dbtimeGuard.DbtimeNew(); + + Call( m_pTrxLogger->ErrLogInsert( changeCtx, opcode, key, data, flags ) ); + dbtimeGuard.Commit(); + } + + // WARNING: Can't fail after this point !!! + SkipListNode* pnodeNew = m_pbbtbuff->PnodeInsert_( changeCtx, rgNodes, opcode, key, data, flags, fDuplicate ); + + PageOffsetTuple pgOffsetNew = m_pbbtbuff->IpgOffsetFromLink( linkNew ); + m_pbbtbuff->ChangeCurr( pgOffsetNew.ipg, pnodeNew ); + Assert( linkNew == m_pbbtbuff->GetLinkToCurrNode() ); // sanity check + } + +HandleError: + m_pbbtbuff->DowngradeLatches(); + return err; +} + +template +ERR BBTBuffWriter::ErrDelete( const KEY& key ) +{ + ERR err = JET_errSuccess; + + m_pbbtbuff->ResetCurr(); + Call( m_pbbtbuff->ErrWriteLatchAll() ); + + // Check/Prepare for deletion + // All modifications are deferred until we know that delete can succeed unconditionally + { + int result; + BBTBuffChangeContext changeCtx{}; // zero-initializes + SkipListNode* rgNodes[ MAX_LEVELS ]; + BBTBuffHeader* pHeader = m_pbbtbuff->m_pHeader; + Call( m_pbbtbuff->ErrSeek_( key, BBTBuff::SeekMode::LEQ, BBTBuff::SeekPos::Prev, &result, changeCtx.rgLinksPrev, rgNodes ) ); + + // ErrSeek_() will position at the node prev to the node to be deleted. + // result will tell us if the next node is an exact match or not. + + if ( result == 0 ) + { + // Get link to the node to delete. + SkipListLink linkDel = !changeCtx.rgLinksPrev[ 0 ].FNull() ? rgNodes[ 0 ]->LinkNext0() : RgSkipListLinksHead( pHeader )[ 0 ]; + SkipListNode* pnodeToDelete = m_pbbtbuff->PnodeFromLink( linkDel ); + Assert( pnodeToDelete != NULL ); + Assert( pnodeToDelete->CmpKey( key ) == 0 ); + + changeCtx.linkCurr = linkDel; + changeCtx.level = pnodeToDelete->Level(); + + // The seek loop above guarantees that we always land at the latest node in a duplicate sequence. + // Enforce that. Deleting a node that isn't the latest duplicate version will cause corruption ! + Call( m_pbbtbuff->ErrAssertIsLatestInDuplicateSequence( pnodeToDelete ) ); + EnforceSz( !pnodeToDelete->FDuplicateNext0(), "BBTBuff::Delete_BadDupFlag" ); + + // All preparation/checks succeeded + // LOG delete operation + { + DbTimeGuard dbtimeGuard( m_pbbtbuff ); + dbtimeGuard.CoordinatedDirty( m_pTrxLogger->m_dbtimeCoordinated ); + changeCtx.dbtimeBefore = dbtimeGuard.DbtimeBefore(); + changeCtx.dbtimeCurr = dbtimeGuard.DbtimeNew(); + + Call( m_pTrxLogger->ErrLogDelete( changeCtx, pnodeToDelete ) ); + dbtimeGuard.Commit(); + } + + // WARNING: Can't fail after this point !!! + m_pbbtbuff->Delete_( changeCtx, rgNodes, pnodeToDelete ); + } + else + { + err = ErrERRCheck( errBBTNodeNotFound ); + } + } + +HandleError: + m_pbbtbuff->DowngradeLatches(); + return err; +} + +template +ERR BBTBuffWriter::ErrFlagDelete( SkipListLink link ) +{ + ERR err = JET_errSuccess; + Call( m_pbbtbuff->ErrWriteLatchAll() ); + m_pbbtbuff->m_pcsrBase->Dirty(); + CoordinatedDirty(); + + SkipListNode* pnode = m_pbbtbuff->PnodeFromLink( link ); + pnode->SetDeleted( true ); + +HandleError: + m_pbbtbuff->DowngradeLatches(); + return err; +} + +template +ERR BBTBuffWriter::ErrReorganize() +{ + int cNodesMerged = 0; + int cNodesDel = 0; + ERR err = ErrMergeAndDelNodes( NULL, NULL, 0, 0, &cNodesMerged, &cNodesDel, -m_pbbtbuff->CbMax() ); // always reorg + Assert( cNodesMerged == 0 ); + Assert( cNodesDel == 0 ); + return err; +} + +template +ERR BBTBuffWriter::ErrRangeDelete( SkipListLink linkFirst, int cNodes ) +{ + ERR err = JET_errSuccess; + + m_pbbtbuff->AssertLatchedAll( latchWrite ); // not dirty yet + DbTimeGuard dbtimeGuard( m_pbbtbuff ); + dbtimeGuard.CoordinatedDirty( m_pTrxLogger->m_dbtimeCoordinated ); + DBTIME dbtimeBefore = dbtimeGuard.DbtimeBefore(); + DBTIME dbtimeCurr = dbtimeGuard.DbtimeNew(); + + CallR( m_pTrxLogger->ErrLogRangeDelete( dbtimeBefore, dbtimeCurr, linkFirst, cNodes ) ); + dbtimeGuard.Commit(); + + m_pbbtbuff->RangeDelete_( linkFirst, cNodes ); + return err; +} + +// Returns the cost of doing a full reorg relative to doing inplace operations. +// where cNodesTotal is the total nodes in a BBTBuff, +// and the return value is the number of nodes to inplace delete/insert to match that cost. +// For example, for input -> out +// 0 -> 0 +// 100 -> 15 +// 500 -> 55 +// 1000 -> 100 +// 2500 -> 221 +// 5000 -> 406 +INLINE int CalcSkipListMergeHeuristic( int n ) +{ + // Skiplist has avg insert/delete performance of log2(n) + // Each reorg requires individually visiting every node and copying it (n operations). + // Then copying back the reorg-ed buffers. + // Assume that the nodes are small enough that cost of copying is negligible compared to visiting the node. + // So if n = log2(n) * cOPs (where cOPs = the number of inplace ops). + // return cOps = n / log2(n) + const double log10Of2 = 0.30102999566398119521373889472449; + return n > 0 ? static_cast( n / ( log10( n ) / log10Of2 ) ) : 0; +} + +// Reorganize and merge: Merges incoming nodes in pseqNodesMerge while deleting local nodes specified by pseqNodesDel. +// 1. Makes a logical copy of the list by allocating new page sized buffers on the heap +// and copying over all the nodes. +// - If copying is impossible or inefficient, does an inplace merge-delete. +// - Caller can specify cbReorgthreshold to force a reorg if doing a reorg would return atleast that much empty space. +// 2. Merges nodes in pseqNodesMerge into the copied list. Merge sequence must be sorted. +// 3. Removes any local nodes that are in the delete sequence. Del sequence must be sorted. +// 4. The new pages are mem-copied back into the cpage buffers. +// 5. The BBTBuff header is adjusted to reflect the new reality, in the end. +template +ERR BBTBuffWriter::ErrMergeAndDelNodes( + _In_count_( cNodesMerge ) const SkipListNode** rgNodesMerge, + _In_count_( cNodesToDel ) SkipListLink* rgLinksDel, + const int cNodesMerge, + const int cNodesToDel, + _Out_ int* pcNodesMerged, + _Out_ int* pcNodesDel, + int cbReorgThreshold ) +{ + Assert( pcNodesMerged != NULL ); + Assert( pcNodesDel != NULL ); + m_pbbtbuff->AssertLatchedAll( latchWrite ); // not dirty yet + m_pbbtbuff->ResetCurr(); + + ERR err = JET_errSuccess; + BYTE** rgpbPage = NULL; + SkipListLink linkIbMergeStart( 0 ); + const int cMaxPages = m_pbbtbuff->Cpg(); + + Call( m_pbbtbuff->ErrWriteLatchAll() ); + + err = ErrReorg_ProcessDeletes( rgLinksDel, cNodesToDel, &linkIbMergeStart ); + + if ( err == errBBTBuffFull ) + { + // Reorg can't be done because existing data expanded because of skiplist leveling + // and there weren't enough deleted nodes to cover the difference. + *pcNodesDel = 0; + *pcNodesMerged = 0; + goto HandleError; + } + + const int cbReorgGained = m_pbbtbuff->m_pHeader->le_ibMicFree->ToInt() - linkIbMergeStart.ToInt(); // can be negative + if ( cbReorgGained < cbReorgThreshold ) + { + const int heuristic = CalcSkipListMergeHeuristic( m_pbbtbuff->CNodes() ); + if ( cNodesToDel < heuristic ) + { + // We may be doing too less to do a full reorg. + // Calculate approx count of merges too. + int cbLeftReorg = m_pbbtbuff->CbMax() - linkIbMergeStart.ToInt(); + int cbLeftNoReorg = m_pbbtbuff->CbMax() - m_pbbtbuff->m_pHeader->le_ibMicFree->ToInt(); + int cOpsReorg = cNodesToDel; + int cOpsNoReorg = cNodesToDel; + + for ( int i = 0; i < cNodesMerge; i++ ) + { + // Merge count is calculated off of cbLeft after reorg. + // It should be close to current cbFree (assuming we are doing few deletes, thats why we are here). + // In which case it doesn't matter much, or if we did delete some large node, then we assume that + // most nodes are small and cOpsExpected will climb higher than the heuristic, skipping in-place merge. + const SkipListNode* pnodeMerge = rgNodesMerge[ i ]; + const int cb = SkipListNode::Cb( cOpsReorg, pnodeMerge->CbKey(), pnodeMerge->CbData() ); + cbLeftReorg -= cb; + cbLeftNoReorg -= cb; + + if ( cbLeftNoReorg >= 0 ) + { + cOpsNoReorg++; + } + + if ( cbLeftReorg < 0 || cOpsReorg > heuristic ) // count 1 over the heuristic + { + break; + } + + cOpsReorg++; + } + + // UA_TODO: this works well for small sized nodes. Too much variance in node sizes, or large nodes will cause problems. + // For example, lets say we delete 1 large node on a full bbtbuff, and try to merge in a few small nodes. + // The merge might not be possible without reorg, but we will try to do an inplace merge. + // This atleast makes forward progress (the large node will be deleted, but no merges will be done). + // The next evict will have more forward progress. + + // UA_TODO: Enable code below. Needs logging support for in-place merge and del. + //if ( cOpsReorg < heuristic && cOpsNoReorg == cOpsReorg ) + //{ + // // If we are doing a small number of merge and deletes then use in-place merge, + // // and we can do the same number of operations with a reorg. + // Call( ErrInplaceMergeAndDelNodes( rgNodesMerge, rgNodesDel, cNodesMerge, cNodesDel, pcNodesMerged, pcNodesDel ) ); + // goto HandleError; + //} + } + } + + Alloc( rgpbPage= (BYTE**) _alloca( sizeof(BYTE*) * cMaxPages ) ); + for ( int i = 0; i < cMaxPages; i++ ) + { + BFAlloc( bfasTemporary, (void**) &rgpbPage[ i ], m_pbbtbuff->m_pcsrBase->Cpage().CbPage() ); + Alloc( rgpbPage[ i ] ); + } + + int cNodesMerged = 0; + int cNodesDeleted = 0; + m_pbbtbuff->CopyMergeAndDelNodes_( + rgpbPage, + rgNodesMerge, + rgNodesMerge + cNodesMerge, + rgLinksDel, + rgLinksDel + cNodesToDel, + linkIbMergeStart, + &cNodesMerged, + &cNodesDeleted ); + + // The skip list has been copied and reogranized + // Dirty all pages to copy back to the original + // WARNING: All local node pointers (e.g. in the merge, del sequences) are invalid after this point. + // LOG the MergeAndDel operation + { + DbTimeGuard dbtimeGuard( m_pbbtbuff ); + dbtimeGuard.CoordinatedDirty( m_pTrxLogger->m_dbtimeCoordinated ); + DBTIME dbtimeBefore = dbtimeGuard.DbtimeBefore(); + DBTIME dbtimeCurr = dbtimeGuard.DbtimeNew(); + + Call( m_pTrxLogger->ErrLogMergeAndDel( + dbtimeBefore, + dbtimeCurr, + rgNodesMerge, + rgLinksDel, + cNodesMerged, + cNodesDeleted, + linkIbMergeStart ) ); + dbtimeGuard.Commit(); + } + + // WARNING: Can't fail after this point !!! + *pcNodesMerged = cNodesMerged; + *pcNodesDel = cNodesDeleted; + + // Copy root page (BBTBuffHeader + any data) + memcpy( + m_pbbtbuff->PbPage( 0 ) + m_pbbtbuff->IbHeader(), + rgpbPage[ 0 ] + m_pbbtbuff->IbHeader(), + m_pbbtbuff->PFormat()->cbBBTRoot ); + + // if the last node fits perfectly at the end of the last page, ibMicFree can point to the next page + int ipgLast = min( cMaxPages - 1, m_pbbtbuff->IpgOffsetFromLink( m_pbbtbuff->m_pHeader->le_ibMicFree ).ipg ); + for ( int i = 1; i <= ipgLast; i++ ) + { + // Copy back the page at the appropriate offset + int cbCopy = m_pbbtbuff->IbPageDataEnd( i ) - m_pbbtbuff->IbPageDataBegin( i ); + memcpy( + m_pbbtbuff->PbPage( i ) + m_pbbtbuff->IbPageDataBegin( i ), + rgpbPage[ i ] + m_pbbtbuff->IbPageDataBegin( i ), + cbCopy ); + } + + // UA_TODO: pattern-fill leftover pages + + // Return a warning if we couldn't merge in all of the external nodes. + if ( *pcNodesMerged < cNodesMerge ) + { + err = ErrERRCheck( wrnBBTMergeTargetFull ); + } + +HandleError: + // Cleanup allocated memory + if ( rgpbPage ) + { + for ( int i = 0; i < cMaxPages; i++ ) + { + BFFree( rgpbPage[ i ] ); + } + } + + return err; +} + +template +ERR BBTBuffWriter::ErrReorg_ProcessDeletes( + _In_count_( cLinksDel ) SkipListLink* rgLinksDel, + const int cLinksDel, + _Out_ SkipListLink* plinkIbMerge ) +{ + ERR err = JET_errSuccess; + BBTBuffHeader* pHeader = m_pbbtbuff->m_pHeader; + SkipListNode* pnodeCurr; + SkipListLink linkDelCurr = ( cLinksDel > 0 ? rgLinksDel[ 0 ] : SkipListLink::Null() ); + SkipListLink linkCurr = RgSkipListLinksHead( pHeader )[ 0 ]; + SkipListLink ibCurr = SkipListLink::FromInt( sizeof( BBTBuffHeader ) ); // leave space for the header + int cNodesLeft = 0; + int iLinkDel = 0; + + Assert( latchWrite == m_pbbtbuff->m_pcsrBase->Latch() ); + + while ( !linkCurr.FNull() ) + { + pnodeCurr = m_pbbtbuff->PnodeFromLink( linkCurr ); + if ( linkCurr != linkDelCurr ) + { + const int level = m_pbbtbuff->GenLevel( cNodesLeft ); // Re-level deterministically because we know the node count, and we are appending sequentially + const int cb = SkipListNode::Cb( level, pnodeCurr->CbKey(), pnodeCurr->CbData() ); + const int cbUsed = SkipListLink::Roundup( cb ); // count wasted space too + + PageOffsetTuple pgOffsetCurr = m_pbbtbuff->IpgOffsetFromLink( ibCurr ); + const int cbLeft = ( pgOffsetCurr.ipg < m_pbbtbuff->Cpg() ? m_pbbtbuff->IbPageDataEnd( pgOffsetCurr.ipg ) - pgOffsetCurr.ibOnPage : 0 ); + if ( cbUsed > cbLeft ) + { + pgOffsetCurr.ipg++; + if ( pgOffsetCurr.ipg >= m_pbbtbuff->Cpg() ) + { + // We haven't added any extra nodes. But overflowed because of different skiplist leveling. + Call( ErrERRCheck( errBBTBuffFull ) ); + } + + pgOffsetCurr.ibOnPage = (USHORT) m_pbbtbuff->IbPageDataBegin( pgOffsetCurr.ipg ); + ibCurr = m_pbbtbuff->LinkFromIpgOffset( pgOffsetCurr.ipg, pgOffsetCurr.ibOnPage ); + } + + ibCurr.Inc( cbUsed ); + cNodesLeft++; + } + else + { + iLinkDel++; + linkDelCurr = ( iLinkDel < cLinksDel ? rgLinksDel[ iLinkDel ] : SkipListLink::Null() ); + } + + linkCurr = pnodeCurr->LinkNext0(); + } + + Assert( linkDelCurr.FNull() ); // all deleted nodes should've been matched + Assert( iLinkDel == cLinksDel ); + + // When we reorg, node sizes may increase (because of different skiplist leveling). + Assert( cNodesLeft == pHeader->le_cNodes - iLinkDel ); + *plinkIbMerge = ibCurr; + +HandleError: + return err; +} diff --git a/dev/ese/src/inc/ccsr.hxx b/dev/ese/src/inc/ccsr.hxx index 40240e8c..b08228e6 100644 --- a/dev/ese/src/inc/ccsr.hxx +++ b/dev/ese/src/inc/ccsr.hxx @@ -58,7 +58,6 @@ class CSR DBTIME Dbtime( ) const; VOID SetDbtime( const DBTIME dbtime ); VOID RevertDbtime( const DBTIME dbtime, const ULONG fFlags ); - VOID RestoreDbtime( const DBTIME dbtime, const BOOL fPageFDPDeleteBefore ); BOOL FLatched( ) const; LATCH Latch( ) const; PGNO Pgno( ) const; @@ -237,9 +236,14 @@ class CSR const VOID * PvBufferForCrashDump() { return m_cpage.PvBuffer(); } #ifdef DEBUGGER_EXTENSION + VOID LoadDehydratedPage( const IFMP ifmp, const PGNO pgno, VOID* const pv, const ULONG cb, const ULONG cbPage ); VOID Dump( CPRINTF * pcprintf, DWORD_PTR dwOffset = 0 ) const; #endif // DEBUGGER_EXTENSION +#ifdef ENABLE_JET_UNIT_TEST + VOID LoadNewTestPage( const ULONG cb, const IFMP ifmp = ifmpNil, const PGNO pgno = 42 ); +#endif // ENABLE_JET_UNIT_TEST + private: CSR( const CSR& ); // not defines @@ -424,11 +428,11 @@ VOID CSR::SetDbtime( const DBTIME dbtime ) } INLINE -VOID CSR::OverrideDbtime_( const DBTIME dbtime, const ULONG fFlags ) +VOID CSR::RevertDbtime( const DBTIME dbtime, const ULONG fFlags ) { ASSERT_VALID( this ); Assert( FDirty() ); - + Assert( Latch() == latchWrite ); Assert( dbtime <= m_dbtimeSeen ); @@ -438,21 +442,6 @@ VOID CSR::OverrideDbtime_( const DBTIME dbtime, const ULONG fFlags ) Assert( dbtime == m_cpage.Dbtime() ); } -INLINE -VOID CSR::RevertDbtime( const DBTIME dbtime, const ULONG fFlags ) -{ - Assert( PinstFromIfmp( m_cpage.Ifmp() )->m_plog->FRecoveringMode() != fRecoveringRedo ); // redo gets all preconditions done, and never has to revert. - OverrideDbtime_( dbtime, fFlags ); -} - -INLINE -VOID CSR::RestoreDbtime( const DBTIME dbtime, const BOOL fPageFDPDeleteBefore ) -{ - Assert( m_cpage.FLoadedPage() ); - Assert( PinstFromIfmp( m_cpage.Ifmp() )->m_plog->FRecoveringMode() == fRecoveringRedo ); - OverrideDbtime_( dbtime, m_cpage.FFlags() | ( fPageFDPDeleteBefore ? CPAGE::fPageFDPDelete : 0 ) ); -} - #ifdef DEBUG @@ -811,6 +800,26 @@ INLINE ERR CSR::ErrLoadPage( } +#ifdef ENABLE_JET_UNIT_TEST +INLINE VOID CSR::LoadNewTestPage( const ULONG cb, const IFMP ifmp /* = ifmpNil */, const PGNO pgno /* = 42 */ ) +{ + ASSERT_VALID( this ); + Assert( m_latch == latchNone ); + + m_cpage.LoadNewTestPage( cb, ifmp, pgno ); + + // set members + m_pgno = m_cpage.PgnoThis(); + m_cpage.SetDbtime( dbtimeStart ); + m_dbtimeSeen = m_cpage.Dbtime(); + m_latch = latchWrite; + m_pagetrimState = pagetrimNormal; + + Assert( m_dbtimeSeen == m_cpage.Dbtime() ); +} +#endif // ENABLE_JET_UNIT_TEST + + INLINE VOID CSR::CopyPage( const VOID* pvPage, const ULONG cbPage ) { ASSERT_VALID( this ); @@ -1168,3 +1177,35 @@ VOID CSR::Reset() m_dbtimeSeen = dbtimeNil; m_pagetrimState = pagetrimNormal; } + +// Variable sized CSR array. +// Use with alloca to create a CSR array on stack. +// Automatically releases any held latches. +template +class CSRArray : public FixedArray +{ +public: + using FixedArray::FixedArray; // inherit base constructor + + // Copy and move constructors can't be inherited. + CSRArray() = default; + CSRArray( const FixedArray& rhs ) : FixedArray( rhs ) {} + CSRArray( CSRArray&& rhs ) : FixedArray( std::move( rhs ) ) {} + CSRArray( FixedArray&& rhs ) : FixedArray( std::move( rhs ) ) {} // both are needed because a base rvalue-reference can't bind to a derived type + + const CSRArray& operator=( CSRArray&& rhs ) { return static_cast( FixedArray::operator=( std::move( rhs ) ) ); } + const CSRArray& operator=( FixedArray&& rhs ) { return static_cast( FixedArray::operator=( std::move( rhs ) ) ); } + ~CSRArray() + { + if ( m_fOwnsArray ) + { + for ( int i = 0; i < CItems(); i++ ) + { + ( *this )[ i ].ReleasePage(); + } + } + } +}; + +using CSRStackArray = CSRArray; +using CSRHeapArray = CSRArray; diff --git a/dev/ese/src/inc/cpage.hxx b/dev/ese/src/inc/cpage.hxx index 7e6892cd..cd988396 100644 --- a/dev/ese/src/inc/cpage.hxx +++ b/dev/ese/src/inc/cpage.hxx @@ -334,7 +334,7 @@ class CPAGE const ULONG cb ); #ifdef ENABLE_JET_UNIT_TEST - VOID LoadNewTestPage( _In_ const ULONG cb, _In_ const IFMP ifmp = ifmpNil ); + VOID LoadNewTestPage( _In_ const ULONG cb, _In_ const IFMP ifmp = ifmpNil, const PGNO pgno = 42 ); #endif // ENABLE_JET_UNIT_TEST VOID LoadPage( const IFMP ifmp, const PGNO pgno, VOID * const pv, const ULONG cb ); @@ -387,6 +387,7 @@ class CPAGE template< PageNodeBoundsChecking pgnbc = pgnbcNoChecks > VOID GetPtrReservedTag ( INT itag, LINE* pline, _Out_opt_ ERR* perrNoEnforce = NULL ) const; VOID ReplaceReservedTag ( INT itag, const DATA* rgdata, INT cdata ); + VOID ResetReservedTag ( INT itag, INT cb, BYTE fill ); template< PageNodeBoundsChecking pgnbc = pgnbcNoChecks > VOID GetPtr ( INT iline, LINE * pline, _Out_opt_ ERR * perrNoEnforce = NULL ) const; @@ -419,6 +420,8 @@ class CPAGE BOOL FEmptyPage ( ) const; BOOL FPreInitPage ( ) const; BOOL FParentOfLeaf ( ) const; + BOOL FBBTBuffRootPage( ) const; + BOOL FBBTBuffPage ( ) const; BOOL FSpaceTree ( ) const; BOOL FScrubbed ( ) const; @@ -464,7 +467,7 @@ class CPAGE VOID SetPgnoNext ( PGNO pgno ); VOID SetPgnoPrev ( PGNO pgno ); VOID SetDbtime ( const DBTIME dbtime ); - VOID RevertDbtime ( const DBTIME dbtime, const ULONG fFlags ); + VOID RevertDbtime( const DBTIME dbtime, const ULONG fFlags ); VOID SetFlags ( ULONG fFlags ); VOID ResetParentOfLeaf ( ); VOID SetFEmpty ( ); @@ -676,6 +679,8 @@ class CPAGE enum : ULONG { fPageRoot = 0x0001 }; enum : ULONG { fPageLeaf = 0x0002 }; enum : ULONG { fPageParentOfLeaf = 0x0004 }; + enum : ULONG { fPageBBTBuffRoot = 0x0100 }; // reuse SLVAvail + enum : ULONG { fPageBBTBuff = 0x0200 }; // reuse SLVOwnerMap // special flags enum : ULONG { fPageEmpty = 0x0008 }; @@ -1067,6 +1072,9 @@ class CPAGE FLAG32 m_reservedTestFlags : 8; // Bits reserved for test #endif // Add non-test flags here. + FLAG32 m_fPageScrubbedPrev : 1; // On dirty, fPageScrubbed flag is reset. But if we revert the dirty, the fPageScrubbed must be restored. + // This bit stores the previous value of the flag, when Dirty() is called. + FLAG32 m_fPageScrubbedPrevSet : 1; // Set if fPageScrubbed state has been captured. Used to prevent overwriting the state in case of multiple dirties. }; FLAG32 m_fRuntimeFlags; }; @@ -1309,6 +1317,27 @@ INLINE BOOL CPAGE::FLongValuePage ( ) const } +// ================================================================ +INLINE BOOL CPAGE::FBBTBuffRootPage() const +// ================================================================ +{ + if ( FFlags() & fPageBBTBuffRoot ) + { + Assert( FFlags() & fPageBBTBuff ); + } + + return FFlags() & fPageBBTBuffRoot; +} + + +// ================================================================ +INLINE BOOL CPAGE::FBBTBuffPage() const +// ================================================================ +{ + return FFlags() & fPageBBTBuff; +} + + // ================================================================ INLINE BOOL CPAGE::FNewRecordFormat ( ) const // ================================================================ @@ -1613,7 +1642,7 @@ INLINE void CPAGE::SetITagState_( INT itagMicFree, INT ctagReserved ) bool fResvTagFormatEnabled = FResvTagFormatEnabled(); if ( fResvTagFormatEnabled || ( ppghdr->itagState >> PGHDR::SHF_CTAG_RESERVED ) > 0 ) { - Assert( m_ifmp == ifmpNil || fResvTagFormatEnabled ); // UA_TODO: can this trigger if a build is rolled back? + Assert( m_ifmp == ifmpNil || fResvTagFormatEnabled ); // can this trigger if a build is rolled back? ppghdr->itagState = USHORT( ( ctagReserved << PGHDR::SHF_CTAG_RESERVED ) | itagMicFree ); } else diff --git a/dev/ese/src/inc/daedef.hxx b/dev/ese/src/inc/daedef.hxx index 06d33f97..4a173991 100644 --- a/dev/ese/src/inc/daedef.hxx +++ b/dev/ese/src/inc/daedef.hxx @@ -1169,6 +1169,8 @@ class DATA VOID DeltaCb ( INT i ); VOID Nullify (); + std::string ToString(); + #ifdef DEBUG public: DATA (); @@ -1261,6 +1263,29 @@ INLINE VOID DATA::Nullify() } +// ================================================================ +INLINE std::string DATA::ToString() +// ================================================================ +{ + std::string str; + str.reserve( Cb() * 3 ); + char hex[ 4 ]; + BYTE* pb = (BYTE*) Pv(); + + for ( int i = 0; i < Cb(); i++ ) + { + sprintf_s( hex, "%02x ", pb[ i ] ); + str.append( hex ); + } + + return str; + + // Force includes the function even if there are no calls to it. + // This allows the function to be available for debugging in VS. +#pragma comment(linker, "/include:" __FUNCDNAME__) +} + + #ifdef DEBUG @@ -1345,6 +1370,8 @@ class KEY VOID Advance ( INT cb ); VOID Nullify (); + std::string ToString(); + #ifdef DEBUG public: VOID Invalidate (); @@ -1438,6 +1465,21 @@ INLINE VOID KEY::Nullify() } +// ================================================================ +INLINE std::string KEY::ToString() +// ================================================================ +{ + std::string str = prefix.ToString(); + str += '.'; + str += suffix.ToString(); + return str; + + // Force includes the function even if there are no calls to it. + // This allows the function to be available for debugging in VS. +#pragma comment(linker, "/include:" __FUNCDNAME__) +} + + // ================================================================ INLINE USHORT KEY::CbLimitKeyMost( const USHORT usT ) // ================================================================ diff --git a/dev/ese/src/inc/esestd.hxx b/dev/ese/src/inc/esestd.hxx index 92b009c7..b4cba748 100644 --- a/dev/ese/src/inc/esestd.hxx +++ b/dev/ese/src/inc/esestd.hxx @@ -110,6 +110,7 @@ using namespace std; #include "idb.hxx" #include "callback.hxx" #include "fcb.hxx" +#include "bbtbuff.hxx" #include "fucb.hxx" #include "scb.hxx" #include "tdb.hxx" diff --git a/dev/ese/src/inc/jettest.hxx b/dev/ese/src/inc/jettest.hxx index 92c623dc..8c0adf55 100644 --- a/dev/ese/src/inc/jettest.hxx +++ b/dev/ese/src/inc/jettest.hxx @@ -238,6 +238,7 @@ class JetTestFixture protected: JetTestFixture(); + JetTestFixture( JetUnitTestResult* presult ) : m_presult( presult ) {} virtual ~JetTestFixture(); void Fail_( const char * const szFile, const INT line, const char * const szCondition ); @@ -245,7 +246,7 @@ class JetTestFixture virtual bool SetUp_() = 0; virtual void TearDown_() = 0; - private: + protected: JetUnitTestResult * m_presult; }; diff --git a/dev/ese/src/inc/node.hxx b/dev/ese/src/inc/node.hxx index 0a5ac981..12ff901f 100644 --- a/dev/ese/src/inc/node.hxx +++ b/dev/ese/src/inc/node.hxx @@ -43,11 +43,12 @@ enum NodeRootField { PERSISTED enum NodeResvTagId : BYTE { - rtidInvalid = 0, - rtidMax = 0x1f // tag Ids are byte values, but we only support a max of 7 reserved tags. - // Reserving 3 high bits to keep it similar to NodeFlags on the first byte of a regular iline. - // This isn't needed because reserved tags don't have node flags. - // Keeping these bits free just in case. They can be reclaimed in the future. + rtidInvalid = 0, + rtidBBTBuff = 1, + rtidMax = 0x1f // tag Ids are byte values, but we only support a max of 7 reserved tags. + // Reserving 3 high bits to keep it similar to NodeFlags on the first byte of a regular iline. + // This isn't needed because reserved tags don't have node flags. + // Keeping these bits free just in case. They can be reclaimed in the future. }; PERSISTED diff --git a/dev/ese/src/noncore/interop/exceptions.h b/dev/ese/src/noncore/interop/exceptions.h index 4f6537b4..15902d91 100644 --- a/dev/ese/src/noncore/interop/exceptions.h +++ b/dev/ese/src/noncore/interop/exceptions.h @@ -1007,6 +1007,54 @@ namespace Isam }; + [Serializable] + public ref class IsamBBTNodeCorruptedException : public IsamCorruptionException + { + public: + IsamBBTNodeCorruptedException() : IsamCorruptionException( "A property of the BBT node is logically corrupted. Or the BBT node isn't valid.", JET_errBBTNodeCorrupted) + { + } + + // Constructor with embedded exception. Does not use the string from esent.h. + IsamBBTNodeCorruptedException( String ^ description, Exception^ innerException ) : + IsamCorruptionException( description, innerException ) + { + } + + IsamBBTNodeCorruptedException( + System::Runtime::Serialization::SerializationInfo^ info, + System::Runtime::Serialization::StreamingContext context + ) + : IsamCorruptionException( info, context ) + { + } + + }; + + [Serializable] + public ref class IsamBBTBuffCorruptedException : public IsamCorruptionException + { + public: + IsamBBTBuffCorruptedException() : IsamCorruptionException( "A BBT buff is logically corrupted. The nodes are out of sequence or the BBT header is corrupt.", JET_errBBTBuffCorrupted) + { + } + + // Constructor with embedded exception. Does not use the string from esent.h. + IsamBBTBuffCorruptedException( String ^ description, Exception^ innerException ) : + IsamCorruptionException( description, innerException ) + { + } + + IsamBBTBuffCorruptedException( + System::Runtime::Serialization::SerializationInfo^ info, + System::Runtime::Serialization::StreamingContext context + ) + : IsamCorruptionException( info, context ) + { + } + + }; + [Serializable] public ref class IsamCannotSeparateIntrinsicLVException : public IsamUsageException { @@ -9219,6 +9267,10 @@ static IsamErrorException^ JetErrToException( const JET_ERR err ) return gcnew IsamPageTagCorruptedException; case JET_errNodeCorrupted: return gcnew IsamNodeCorruptedException; + case JET_errBBTNodeCorrupted: + return gcnew IsamBBTNodeCorruptedException; + case JET_errBBTBuffCorrupted: + return gcnew IsamBBTBuffCorruptedException; case JET_errCannotSeparateIntrinsicLV: return gcnew IsamCannotSeparateIntrinsicLVException; case JET_errSeparatedLongValue: diff --git a/dev/ese/src/os/edbg.cxx b/dev/ese/src/os/edbg.cxx index 277bc378..b637f0e8 100644 --- a/dev/ese/src/os/edbg.cxx +++ b/dev/ese/src/os/edbg.cxx @@ -371,6 +371,7 @@ DEBUG_EXT( EDBGSetImplicitDB ); DEBUG_EXT( EDBGSetImplicitInst ); DEBUG_EXT( EDBGSetImplicitBT ); DEBUG_EXT( EDBGSetPii ); +DEBUG_EXT( EDBGDumpBBTBuff ); extern VOID DBUTLDumpRec( const LONG cbPage, const FUCB * const pfucbTable, const VOID * const pv, const INT cb, CPRINTF * pcprintf, const INT cbWidth ); @@ -495,6 +496,10 @@ LOCAL const EDBGFUNCMAP rgfuncmap[] = { "DUMP", EDBGDump, "DUMP <class> <address> - Dump an ESE structure at the given address" }, +{ + "DUMPBBTBUFF", EDBGDumpBBTBuff, + "DUMPBBTBUFF <pBBTBuff>] [<level>] - Dumps nodes in a loaded BBT Buff. All nodes <=level are dumped." +}, { "DUMPCACHEINFO", EDBGDumpCacheInfo, "DUMPCACHEINFO [<szTable>] [<ifmp|.>] - Dumps info on pages cached in the buffer manager" @@ -16820,6 +16825,129 @@ DEBUG_EXT( EDBGHelpDump ) } +// ================================================================ +LOCAL ERR ErrEDBGDumpBBTBuff_( BBTBuff* pBBTBuffDebuggee, INT level ) +// ================================================================ +{ + ERR err = JET_errSuccess; + BBTBuff* pBBTBuff = NULL; + CSR* pcsrBase = NULL; + CSR* rgcsr = NULL; + BBTBuffFormat rgFormat[ sizeof( BBTBUFF_FORMAT_CONSTANTS ) ]; + CSRStackArray rgcsrNew; + + if ( !FFetchVariable( pBBTBuffDebuggee, &pBBTBuff ) ) + { + dprintf( "Error: Failed to fetch BBT Buff\n" ); + Error( ErrERRCheck( JET_errInternalError ) ); + } + + // Fetch pages + if ( !FReadGlobal( "BBTBUFF_FORMAT_CONSTANTS", &rgFormat ) ) + { + dprintf( "Error: Failed to get BBTBuff format constants.\n" ); + Error( ErrERRCheck( JET_errInternalError ) ); + } + + if ( !FFetchVariable( pBBTBuff->m_pcsrBase, &pcsrBase ) || + !FFetchVariable( pBBTBuff->m_rgcsrLatched, &rgcsr, pBBTBuff->m_cMaxPages - 1 ) ) + { + dprintf( "Error: Failed to fetch BBT CSRs\n" ); + Error( ErrERRCheck( JET_errInternalError ) ); + } + + rgcsrNew = CSRStackArray( _alloca( sizeof( CSR ) * pBBTBuff->m_cMaxPages ), pBBTBuff->m_cMaxPages, false ); // don't release latches + ULONG cbPage = rgFormat[ pBBTBuff->m_ifmt ].cbCPAGE; + dprintf( "Detected page size: %u\n", cbPage ); + + for ( int i = 0; i < rgcsrNew.CItems(); i++ ) + { + CSR* pcsrCurr = ( i == 0 ? pcsrBase : &rgcsr[ i - 1 ] ); + IFMP ifmp = pcsrCurr->Cpage().Ifmp(); + PGNO pgno = pcsrCurr->Pgno(); // must use CSR::m_pgno, CPAGE::PgnoThis() gets it off of the pghdr, which isn't available yet + BYTE* rgbPage; + BYTE* rgbDebuggee = (BYTE*) ( i == 0 ? pcsrBase->Cpage().PvBuffer() : rgcsr[ i - 1 ].Cpage().PvBuffer() ); + Call( FFetchAlignedVariable( rgbDebuggee, &rgbPage, cbPage ) ); + rgcsrNew[ i ].LoadDehydratedPage( ifmp, pgno, rgbPage, cbPage, cbPage ); + } + + { + LINE line; + BBTBuff bbtBuffNew; + BBTBuff::GetBBTBuffRoot( rgcsrNew[ 0 ], &line ); + BBTBuffHeader* pbbtHeader = BBTBuff::PBBTHeader( line ); + bbtBuffNew.Load( NULL, ifmpNil, &rgcsrNew[ 0 ], CSRHeapArray( rgcsrNew.Subarray( 1 ) ), pbbtHeader, latchReadNoTouch ); + + if ( pBBTBuff->m_pnodeCurr != NULL ) + { + // Translate currency + int ipgCurr = pBBTBuff->m_ipgCurr; + BYTE* rgbDebuggee = (BYTE*) ( ipgCurr == 0 ? pcsrBase->Cpage().PvBuffer() : rgcsr[ ipgCurr - 1 ].Cpage().PvBuffer() ); + auto ibOnPage = ( (BYTE*) pBBTBuff->m_pnodeCurr ) - rgbDebuggee; + if ( ibOnPage > bbtBuffNew.IbPageDataEnd( ipgCurr ) ) + { + dprintf( "Error: Can't figure out currency\n" ); + Error( ErrERRCheck( JET_errInternalError ) ); + } + + SkipListLink linkCurr = bbtBuffNew.LinkFromIpgOffset( ipgCurr, (int) ibOnPage ); + Call( bbtBuffNew.ErrSetCurrNodeFromLink( linkCurr ) ); + } + + std::string szDump = DumpBBTBuff( bbtBuffNew, (INT) level ); + dprintf( "%s", szDump.c_str() ); + } + +HandleError: + rgcsrNew.ForEach( []( CSR& csr ) + { + UnfetchAligned( csr.Cpage().PvBuffer() ); + } ); + + Unfetch( rgcsr ); + Unfetch( pcsrBase ); + Unfetch( pBBTBuff ); + return err; +} + +// ================================================================ +DEBUG_EXT( EDBGDumpBBTBuff ) +// ================================================================ +{ + BBTBuff* pBBTBuffDebuggee = NULL; + ULONG level = 0; + + auto printHelp = []() + { + // invalid usage + // + dprintf( "Usage: DUMPBBTBUFF []\n" ); + dprintf( " is the address of a loaded BBTBuff object\n" ); + dprintf( " is an integer between 0 - 15. Any nodes with the skiplist level <= to the given level will be dumped\n" ); + dprintf( " 0 dumps all nodes.\n" ); + }; + + if ( argc < 1 || + argc > 2 || + !FAddressFromSz( argv[ 0 ], &pBBTBuffDebuggee ) ) + { + printHelp(); + return; + } + + if ( argc == 2 ) + { + if ( !FUlFromSz( argv[ 1 ], &level, 10 ) ) + { + printHelp(); + return; + } + } + + (void) ErrEDBGDumpBBTBuff_( pBBTBuffDebuggee, (INT) level ); +} + + // ================================================================ DEBUG_EXT( EDBGDump ) // ================================================================ @@ -17175,6 +17303,24 @@ const CHAR * const mpdbstatesz[ JET_dbstateDirtyAndPatchedShutdown + 1 ] = "JET_dbstateDirtyAndPatchedShutdown", }; +// ================================================================ +VOID CSR::LoadDehydratedPage( const IFMP ifmp, const PGNO pgno, VOID* const pv, const ULONG cb, const ULONG cbPage ) +// ================================================================ +{ + ASSERT_VALID( this ); + Assert( m_latch == latchNone ); + + m_cpage.LoadDehydratedPage( ifmp, pgno, pv, cb, cbPage ); + + // set members + m_pgno = pgno; + m_dbtimeSeen = m_cpage.Dbtime(); + m_latch = latchReadNoTouch; + m_pagetrimState = pagetrimNormal; + + Assert( m_dbtimeSeen == m_cpage.Dbtime() ); +} + // ================================================================ VOID CSR::Dump( CPRINTF * pcprintf, DWORD_PTR dwOffset ) const // ================================================================ From a8a66021353974a77970a5756dcad4d1fea9fe91 Mon Sep 17 00:00:00 2001 From: Alexandre Costa Date: Sat, 12 Nov 2022 16:56:13 +0000 Subject: [PATCH 099/102] Refactor uses of CArray to use ErrAppendEntry(), FRemoveLastEntry() and Clear() whenever possible. This change adds three new methods to CArray: - ERR ErrAppendEntry( const CEntry& entry ); - bool FRemoveLastEntry(); - void Clear(); It also replaces common patterns throughout the code with these new methods. [Substrate:76ec4ee374b60a66a56f88b94496417079ce3721] --- dev/ese/published/inc/collection.hxx | 39 +++++++++ dev/ese/src/ese/_log/logread.cxx | 3 +- dev/ese/src/ese/_log/logredo.cxx | 6 +- dev/ese/src/ese/bf.cxx | 2 +- dev/ese/src/ese/cat.cxx | 20 ++--- dev/ese/src/ese/dbscan.cxx | 2 +- dev/ese/src/ese/repair.cxx | 2 +- dev/ese/src/ese/revertsnapshot.cxx | 18 +--- dev/ese/src/ese/space.cxx | 18 ++-- dev/ese/src/inc/pib.hxx | 2 +- .../src/os/blockcache/_hashedlrukcache.hxx | 20 ++--- .../_hashedlrukcachedfiletableentry.hxx | 18 ++-- dev/ese/src/os/edbg.cxx | 2 +- dev/ese/src/os/osfile.cxx | 6 +- .../collection/collectionunit/array.cxx | 50 ++++++++++- .../oslayer/oslayerunit/sparsefiletest.cxx | 82 +++++++++---------- 16 files changed, 175 insertions(+), 115 deletions(-) diff --git a/dev/ese/published/inc/collection.hxx b/dev/ese/published/inc/collection.hxx index 47cec5ec..c9977961 100644 --- a/dev/ese/published/inc/collection.hxx +++ b/dev/ese/published/inc/collection.hxx @@ -4451,6 +4451,9 @@ class CArray void SetEntryDefault( const CEntry& entry ); ERR ErrSetEntry( const size_t ientry, const CEntry& entry ); + ERR ErrAppendEntry( const CEntry& entry ); + bool FRemoveLastEntry(); + void Clear(); void SetEntry( const CEntry* const pentry, const CEntry& entry ); ERR ErrLoadEntries( const BYTE* const rgbData, const size_t cbData ); @@ -4687,6 +4690,42 @@ ErrSetEntry( const size_t ientry, const CEntry& entry ) return ERR::errSuccess; } +// grows the array by one element and sets that element to the value provided + +template< class CEntry > +inline typename CArray< CEntry >::ERR CArray< CEntry >:: +ErrAppendEntry( const CEntry& entry ) +{ + return ErrSetEntry( Size(), entry ); +} + +// removes the last entry of the array, returns false IFF the array is already empty + +template< class CEntry > +inline typename bool CArray< CEntry >:: +FRemoveLastEntry() +{ + if ( Size() == 0 ) + { + return false; + } + + const ERR err = ErrSetSize( Size() - 1 ); + COLLAssert( err == ERR::errSuccess ); + + return true; +} + +// sets the array size to zero + +template< class CEntry > +inline typename void CArray< CEntry >:: +Clear() +{ + const ERR err = ErrSetSize( 0 ); + COLLAssert( err == ERR::errSuccess ); +} + // sets an existing entry of the array. WARNING: the array size and capacity // are not adjusted accordingly, so this method is only supposed to be used // to set existing elements. diff --git a/dev/ese/src/ese/_log/logread.cxx b/dev/ese/src/ese/_log/logread.cxx index edd61c37..1a4103c8 100644 --- a/dev/ese/src/ese/_log/logread.cxx +++ b/dev/ese/src/ese/_log/logread.cxx @@ -623,8 +623,7 @@ VOID LogPrereaderBase::LGPDBEnable( const DBID dbid ) // Otherwise, we need to allocate all the data structures. if ( FLGPDBEnabled( dbid ) ) { - const CArray::ERR errSetSize = m_rgArrayPagerefs[ dbid ].ErrSetSize( 0 ); - Assert( errSetSize == CArray::ERR::errSuccess ); + m_rgArrayPagerefs[ dbid ].Clear(); Assert( FLGPDBEnabled( dbid ) ); } else diff --git a/dev/ese/src/ese/_log/logredo.cxx b/dev/ese/src/ese/_log/logredo.cxx index f4a56d83..1f0e84aa 100644 --- a/dev/ese/src/ese/_log/logredo.cxx +++ b/dev/ese/src/ese/_log/logredo.cxx @@ -10116,8 +10116,7 @@ ERR LOG::ErrLGRIRedoScanCheck( const LRSCANCHECK2 * const plrscancheck, BOOL* co (void)m_arrayPagerefSupercold.ErrSetCapacity( LNextPowerOf2( m_arrayPagerefSupercold.Size() + 1 ) ); } - (void)m_arrayPagerefSupercold.ErrSetEntry( m_arrayPagerefSupercold.Size(), - PageRef( plrscancheck->Dbid(), plrscancheck->Pgno() ) ); + (void)m_arrayPagerefSupercold.ErrAppendEntry( PageRef( plrscancheck->Dbid(), plrscancheck->Pgno() ) ); } // Ideally, we should check for the error returned when latching the page to filter out cases where @@ -12120,8 +12119,7 @@ ERR LOG::ErrLGRIRedoOperations( } } - const CArray::ERR errArray = m_arrayPagerefSupercold.ErrSetSize( 0 ); - Assert( errArray == CArray::ERR::errSuccess ); + m_arrayPagerefSupercold.Clear(); // we report the progress either if this log took too long to replay (at least 5 seconds) or // if the control callback says so ... diff --git a/dev/ese/src/ese/bf.cxx b/dev/ese/src/ese/bf.cxx index 79edf743..4a1f8dcd 100644 --- a/dev/ese/src/ese/bf.cxx +++ b/dev/ese/src/ese/bf.cxx @@ -5255,7 +5255,7 @@ void BFIBuildReferencedPageListForCrashDump( CReferencedPages * ptableReferenced // means that the page won't be included // in our list of referenced pages) - (void) arrayReferencedPages.ErrSetEntry( arrayReferencedPages.Size(), pagepointer ); + (void) arrayReferencedPages.ErrAppendEntry( pagepointer ); } } } diff --git a/dev/ese/src/ese/cat.cxx b/dev/ese/src/ese/cat.cxx index 91748af0..8b63edf1 100644 --- a/dev/ese/src/ese/cat.cxx +++ b/dev/ese/src/ese/cat.cxx @@ -16638,7 +16638,7 @@ ERR ErrCATIAccumulateIndexLocales( { // no entry for this LocaleName + version, add one ... li.m_cIndices = 1; - CLocaleNameInfoArray::ERR errArray = parrayLocales->ErrSetEntry( parrayLocales->Size(), li ); + CLocaleNameInfoArray::ERR errArray = parrayLocales->ErrAppendEntry( li ); if ( CLocaleNameInfoArray::ERR::errSuccess != errArray ) { Assert( CLocaleNameInfoArray::ERR::errOutOfMemory == errArray ); @@ -16829,16 +16829,16 @@ JETUNITTEST( CATMSysLocales, TestCLocaleInfoArrayWillWorkAsRequiredForMSysLocale // insert 4 imaginary records CHECK( 0 == localesarray.Size() ); - err = localesarray.ErrSetEntry( localesarray.Size(), li ); + err = localesarray.ErrAppendEntry( li ); CHECK( err == CLocaleInfoArray::ERR::errSuccess ); li.m_lcid = 1040; - err = localesarray.ErrSetEntry( localesarray.Size(), li ); + err = localesarray.ErrAppendEntry( li ); CHECK( err == CLocaleInfoArray::ERR::errSuccess ); li.m_qwVersion = 0x45; - err = localesarray.ErrSetEntry( localesarray.Size(), li ); + err = localesarray.ErrAppendEntry( li ); CHECK( err == CLocaleInfoArray::ERR::errSuccess ); li.m_lcid = 1046; - err = localesarray.ErrSetEntry( localesarray.Size(), li ); + err = localesarray.ErrAppendEntry( li ); CHECK( err == CLocaleInfoArray::ERR::errSuccess ); // check that the by offset/iEntry all work to retrieve expected results. @@ -16910,23 +16910,23 @@ JETUNITTEST( CATMSysLocales, TestCLocaleNameInfoArrayWillWorkAsRequiredForMSysLo // insert 5 imaginary records CHECK( 0 == localesarray.Size() ); - err = localesarray.ErrSetEntry( localesarray.Size(), li ); + err = localesarray.ErrAppendEntry( li ); CHECK( err == CLocaleNameInfoArray::ERR::errSuccess ); OSStrCbCopyW( li.m_wszLocaleName, sizeof( li.m_wszLocaleName ), L"pt-br" ); - err = localesarray.ErrSetEntry( localesarray.Size(), li ); + err = localesarray.ErrAppendEntry( li ); CHECK( err == CLocaleNameInfoArray::ERR::errSuccess ); li.m_qwVersion = 0x45; - err = localesarray.ErrSetEntry( localesarray.Size(), li ); + err = localesarray.ErrAppendEntry( li ); CHECK( err == CLocaleNameInfoArray::ERR::errSuccess ); OSStrCbCopyW( li.m_wszLocaleName, sizeof( li.m_wszLocaleName ), L"pt-pt" ); - err = localesarray.ErrSetEntry( localesarray.Size(), li ); + err = localesarray.ErrAppendEntry( li ); CHECK( err == CLocaleNameInfoArray::ERR::errSuccess ); li.m_sortID.Data1++; - err = localesarray.ErrSetEntry( localesarray.Size(), li ); + err = localesarray.ErrAppendEntry( li ); CHECK( err == CLocaleNameInfoArray::ERR::errSuccess ); li.m_sortID.Data1--; diff --git a/dev/ese/src/ese/dbscan.cxx b/dev/ese/src/ese/dbscan.cxx index 95a4670e..ef0842a4 100644 --- a/dev/ese/src/ese/dbscan.cxx +++ b/dev/ese/src/ese/dbscan.cxx @@ -3711,7 +3711,7 @@ ERR DBMScanObserverCleanup::ErrCleanupLVPage_( CSR * const pcsr, DBMObjectCache* } LvId lid; LidFromKey( &lid, kdf.key ); - CArray::ERR errT = arrLid.ErrSetEntry( arrLid.Size(), lid ); + CArray::ERR errT = arrLid.ErrAppendEntry( lid ); Assert( errT == CArray::ERR::errSuccess ); } } diff --git a/dev/ese/src/ese/repair.cxx b/dev/ese/src/ese/repair.cxx index f744915e..5f40e69f 100644 --- a/dev/ese/src/ese/repair.cxx +++ b/dev/ese/src/ese/repair.cxx @@ -184,7 +184,7 @@ class PgnoCollection : private CArray< PGNO > { m_rwl.EnterAsWriter(); - if ( ErrSetEntry( Size(), pgno ) != CArray< PGNO >::ERR::errSuccess ) + if ( ErrAppendEntry( pgno ) != CArray< PGNO >::ERR::errSuccess ) { m_rwl.LeaveAsWriter(); return ErrERRCheck( JET_errOutOfMemory ); diff --git a/dev/ese/src/ese/revertsnapshot.cxx b/dev/ese/src/ese/revertsnapshot.cxx index ed4fae8b..71df6d2d 100644 --- a/dev/ese/src/ese/revertsnapshot.cxx +++ b/dev/ese/src/ese/revertsnapshot.cxx @@ -4584,7 +4584,7 @@ ERR CRBSDatabaseRevertContext::ErrAddRootPageRecord( const BOOL fDeleteOperation // Only add entry to root page records if it doesn't exist already if ( m_rgrootpagerec->SearchLinear( rootpagerec, CRBSDatabaseRevertContext::ICRBSDatabaseRootPageRecordEquals ) == CArray< CRootPageRecord >::iEntryNotFound ) { - errArray = m_rgrootpagerec->ErrSetEntry( m_rgrootpagerec->Size(), rootpagerec ); + errArray = m_rgrootpagerec->ErrAppendEntry( rootpagerec ); } if ( errArray != CArray< CRootPageRecord >::ERR::errSuccess ) @@ -4651,7 +4651,7 @@ ERR CRBSDatabaseRevertContext::ErrCapturePageFDPDeleteState( const LONG lRBSGen, Call( ErrDBDiskPageFDPRootDelete( NULL, m_rgrootpagerec->Entry( i ).PgnoDest(), fTrue, fFalse, cbDbPageSize, &fPgnoFDPRootDelete)); CPageFDPDeleteState pagefdpdeletestate( m_rgrootpagerec->Entry( i ).PgnoDest(), fPgnoFDPRootDelete); - errArray = rgpagefdpdeletestate->ErrSetEntry( rgpagefdpdeletestate->Size(), pagefdpdeletestate ); + errArray = rgpagefdpdeletestate->ErrAppendEntry( pagefdpdeletestate ); if ( errArray != CArray< CPageFDPDeleteState >::ERR::errSuccess ) { @@ -4991,14 +4991,10 @@ ERR CRBSDatabaseRevertContext::ErrRBSApplyRootPageRecords( const USHORT cbDbPage VOID CRBSDatabaseRevertContext::ResetRootPageRecords() { - CArray< CRootPageRecord >::ERR errArray = CArray< CRootPageRecord >::ERR::errSuccess; - if ( m_rgrootpagerec != NULL ) { - errArray = m_rgrootpagerec->ErrSetSize( 0 ); + m_rgrootpagerec->Clear(); } - - Assert( errArray == CArray< CRootPageRecord >::ERR::errSuccess ); } // Comparer to allow sorting of pages in our array to try and get sequential writes. @@ -5181,13 +5177,7 @@ ERR CRBSDatabaseRevertContext::ErrFlushDBPages( USHORT cbDbPageSize, BOOL fFlush } } - errArray = m_rgRBSDbPage->ErrSetSize( 0 ); - - if ( errArray != CArray< CPagePointer >::ERR::errSuccess ) - { - Assert( errArray == CArray< CPagePointer >::ERR::errOutOfMemory ); - Error( ErrERRCheck( JET_errOutOfMemory ) ); - } + m_rgRBSDbPage->Clear(); // This will be NULL if there is no .jfm file. if ( m_pfm ) diff --git a/dev/ese/src/ese/space.cxx b/dev/ese/src/ese/space.cxx index d218b3a4..0583cd59 100644 --- a/dev/ese/src/ese/space.cxx +++ b/dev/ese/src/ese/space.cxx @@ -9627,7 +9627,7 @@ LOCAL ERR ErrSPILRProcessObjectSpaceOwnershipSetPgnos( { Call( ErrErrArrayPgnoToJetErr( parrShelved->ErrSetCapacity( 2 * ( parrShelved->Size() + 1 ) ) ) ); } - Call( ErrErrArrayPgnoToJetErr( parrShelved->ErrSetEntry( parrShelved->Size(), (CPgnoFlagged)pgno ) ) ); + Call( ErrErrArrayPgnoToJetErr( parrShelved->ErrAppendEntry( (CPgnoFlagged)pgno ) ) ); } else { @@ -11436,9 +11436,7 @@ LOCAL ERR ErrSPIAddSecondaryExtent( const EXTENTINFO& extinfoReleased = parreiReleased->Entry( parreiReleased->Size() - 1 ); Assert( extinfoReleased.FValid() && ( extinfoReleased.CpgExtent() > 0 ) ); Call( ErrSPIAEFreeExt( pfucb, extinfoReleased.PgnoFirst(), extinfoReleased.CpgExtent() ) ); - CallS( ( parreiReleased->ErrSetSize( parreiReleased->Size() - 1 ) == CArray::ERR::errSuccess ) ? - JET_errSuccess : - ErrERRCheck( JET_errOutOfMemory ) ); + (void)parreiReleased->FRemoveLastEntry(); } Assert( !Pcsr( pfucbAE )->FLatched() ); @@ -11926,7 +11924,7 @@ LOCAL ERR ErrSPIExtendDB( EXTENTINFO extinfoReleased; extinfoReleased.pgnoLastInExtent = pgnoSELastAdj + cpgAvail; extinfoReleased.cpgExtent = cpgAvail; - Call( ( parreiReleased->ErrSetEntry( parreiReleased->Size(), extinfoReleased ) == CArray::ERR::errSuccess ) ? + Call( ( parreiReleased->ErrAppendEntry( extinfoReleased ) == CArray::ERR::errSuccess ) ? JET_errSuccess : ErrERRCheck( JET_errOutOfMemory ) ); } @@ -13241,9 +13239,9 @@ LOCAL ERR ErrSPIReserveSPBufPagesForSpaceTree( // Make sure we have enough room in the array. if ( parreiReleased != NULL ) { - Call( ( parreiReleased->ErrSetEntry( parreiReleased->Size(), extinfoReleased ) == CArray::ERR::errSuccess ) ? - JET_errSuccess : - ErrERRCheck( JET_errOutOfMemory ) ); + Call( ( parreiReleased->ErrAppendEntry( extinfoReleased ) == CArray::ERR::errSuccess ) ? + JET_errSuccess : + ErrERRCheck( JET_errOutOfMemory ) ); } else { @@ -13483,9 +13481,7 @@ LOCAL ERR ErrSPIReserveSPBufPages( const EXTENTINFO& extinfoReleased = arreiReleased[ arreiReleased.Size() - 1 ]; Assert( extinfoReleased.FValid() && ( extinfoReleased.CpgExtent() > 0 ) ); Call( ErrSPIAEFreeExt( pfucb, extinfoReleased.PgnoFirst(), extinfoReleased.CpgExtent(), pfucbParentLocal ) ); - CallS( ( arreiReleased.ErrSetSize( arreiReleased.Size() - 1 ) == CArray::ERR::errSuccess ) ? - JET_errSuccess : - ErrERRCheck( JET_errOutOfMemory ) ); + (void)arreiReleased.FRemoveLastEntry(); fNeedRefill = fTrue; } } diff --git a/dev/ese/src/inc/pib.hxx b/dev/ese/src/inc/pib.hxx index 2b12a677..557be181 100644 --- a/dev/ese/src/inc/pib.hxx +++ b/dev/ese/src/inc/pib.hxx @@ -872,7 +872,7 @@ INLINE ERR PIB::MACRO::ErrInsertPgnoFreed( IFMP ifmp, const PGNO pgnoFreed ) Alloc( m_rgfmppgnoFreed = new CArray< CFMPPage >() ); } - errArray = m_rgfmppgnoFreed->ErrSetEntry( m_rgfmppgnoFreed->Size(), fmppage ); + errArray = m_rgfmppgnoFreed->ErrAppendEntry( fmppage ); if ( errArray != CArray< CFMPPage >::ERR::errSuccess ) { diff --git a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx index b3f89ec4..26882c46 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcache.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcache.hxx @@ -868,9 +868,7 @@ class THashedLRUKCache if ( !fMatched ) { - Call( ErrToErr>( m_arrayClusterSwap.ErrSetEntry( m_arrayClusterSwap.Size(), - CClusterSwap( slotstAccepted.Clno(), - slotstCurrent.Clno() ) ) ) ); + Call( ErrToErr>( m_arrayClusterSwap.ErrAppendEntry( CClusterSwap( slotstAccepted.Clno(), slotstCurrent.Clno() ) ) ) ); } } @@ -889,15 +887,13 @@ class THashedLRUKCache { // NOTE: only evicted slots can be immediately reused so these are all evicts - Call( ErrToErr>( m_arrayCachedBlockUpdate.ErrSetEntry( m_arrayCachedBlockUpdate.Size(), - CCachedBlockUpdate( slotstAccepted ) ) ) ); + Call( ErrToErr>( m_arrayCachedBlockUpdate.ErrAppendEntry( CCachedBlockUpdate( slotstAccepted ) ) ) ); } } // accumulate the updated slot for the journal entry - Call( ErrToErr>( m_arrayCachedBlockUpdate.ErrSetEntry( m_arrayCachedBlockUpdate.Size(), - CCachedBlockUpdate( slotstCurrent ) ) ) ); + Call( ErrToErr>( m_arrayCachedBlockUpdate.ErrAppendEntry( CCachedBlockUpdate( slotstCurrent ) ) ) ); HandleError: return err; @@ -7778,7 +7774,7 @@ void THashedLRUKCache::PerformOpportunisticSlabWriteBacks() // collect this slab for write back - Call( ErrToErr>( arrayIbSlab.ErrSetEntry( arrayIbSlab.Size(), pswbT->IbSlab() ) ) ); + Call( ErrToErr>( arrayIbSlab.ErrAppendEntry( pswbT->IbSlab() ) ) ); // remember the min of the youngest journal positions affecting any of the slabs to write back @@ -7942,7 +7938,7 @@ ERR THashedLRUKCache::ErrFlushAllState( _In_ const JournalPosition jposDura // get a list of all dirty slabs Call( ErrToErr>( arrayIbSlab.ErrSetCapacity( m_ilSlabsToWriteBackByJposMin.Count() ) ) ); - CallS( ErrToErr>( arrayIbSlab.ErrSetSize( 0 ) ) ); + arrayIbSlab.Clear(); m_critSlabsToWriteBack.Enter(); fListLocked = fTrue; @@ -7960,7 +7956,7 @@ ERR THashedLRUKCache::ErrFlushAllState( _In_ const JournalPosition jposDura // collect the slab for write back - Call( ErrToErr>( arrayIbSlab.ErrSetEntry( arrayIbSlab.Size(), pswb->IbSlab() ) ) ); + Call( ErrToErr>( arrayIbSlab.ErrAppendEntry( pswb->IbSlab() ) ) ); } m_critSlabsToWriteBack.Leave(); @@ -9963,7 +9959,7 @@ ERR THashedLRUKCache::ErrSuspendThreadFromStateAccess( _In_ const CMeteredS fLeave = fTrue; CArray*>* const parray = &m_rgarraySuspendedThreads[ group ]; - Call( ErrToErr*>>( parray->ErrSetEntry( parray->Size(), pctls ) ) ); + Call( ErrToErr*>>( parray->ErrAppendEntry( pctls ) ) ); m_critSuspendedThreads.Leave(); fLeave = fFalse; @@ -10004,7 +10000,7 @@ void THashedLRUKCache::ResumeStateAccess() // reset the suspended threads for the next cycle CArray*>* const parray = &m_rgarraySuspendedThreads[ m_msStateAccess.GroupInactive() ]; - CallS( ErrToErr*>>( parray->ErrSetSize( 0 ) ) ); + parray->Clear(); // allow state access to continue diff --git a/dev/ese/src/os/blockcache/_hashedlrukcachedfiletableentry.hxx b/dev/ese/src/os/blockcache/_hashedlrukcachedfiletableentry.hxx index 2eb4cec6..9191096c 100644 --- a/dev/ese/src/os/blockcache/_hashedlrukcachedfiletableentry.hxx +++ b/dev/ese/src/os/blockcache/_hashedlrukcachedfiletableentry.hxx @@ -294,7 +294,7 @@ ERR CHashedLRUKCachedFileTableEntry::ErrUpdateSparseMapForTrim( _In_ const CO for ( i = 0; i < m_arraysparseseg.Size() && m_arraysparseseg[ i ].ibLast < invalidate.IbStart(); i++ ) { - Call( ErrToErr>( arraysparseseg.ErrSetEntry( arraysparseseg.Size(), m_arraysparseseg[ i ] ) ) ); + Call( ErrToErr>( arraysparseseg.ErrAppendEntry( m_arraysparseseg[ i ] ) ) ); } sparsesegNew.ibFirst = ( i < m_arraysparseseg.Size() && @@ -313,13 +313,13 @@ ERR CHashedLRUKCachedFileTableEntry::ErrUpdateSparseMapForTrim( _In_ const CO m_arraysparseseg[ i ].ibLast : invalidate.IbEnd(); - Call( ErrToErr>( arraysparseseg.ErrSetEntry( arraysparseseg.Size(), sparsesegNew ) ) ); + Call( ErrToErr>( arraysparseseg.ErrAppendEntry( sparsesegNew ) ) ); for ( ; i < m_arraysparseseg.Size(); i++ ) { if ( invalidate.IbEnd() < m_arraysparseseg[ i ].ibFirst ) { - Call( ErrToErr>( arraysparseseg.ErrSetEntry( arraysparseseg.Size(), m_arraysparseseg[ i ] ) ) ); + Call( ErrToErr>( arraysparseseg.ErrAppendEntry( m_arraysparseseg[ i ] ) ) ); } } @@ -350,11 +350,11 @@ ERR CHashedLRUKCachedFileTableEntry::ErrUpdateSparseMapForWrite( _In_ const C if ( sparseseg.ibLast < write.IbStart() ) { - Call( ErrToErr>( arraysparseseg.ErrSetEntry( arraysparseseg.Size(), sparseseg ) ) ); + Call( ErrToErr>( arraysparseseg.ErrAppendEntry( sparseseg ) ) ); } else if ( write.IbEnd() < sparseseg.ibFirst ) { - Call( ErrToErr>( arraysparseseg.ErrSetEntry( arraysparseseg.Size(), sparseseg ) ) ); + Call( ErrToErr>( arraysparseseg.ErrAppendEntry( sparseseg ) ) ); } else if ( write.IbStart() <= sparseseg.ibFirst && sparseseg.ibLast <= write.IbEnd() ) { @@ -363,23 +363,23 @@ ERR CHashedLRUKCachedFileTableEntry::ErrUpdateSparseMapForWrite( _In_ const C { sparsesegNew.ibFirst = sparseseg.ibFirst; sparsesegNew.ibLast = write.IbStart() - 1; - Call( ErrToErr>( arraysparseseg.ErrSetEntry( arraysparseseg.Size(), sparsesegNew ) ) ); + Call( ErrToErr>( arraysparseseg.ErrAppendEntry( sparsesegNew ) ) ); sparsesegNew.ibFirst = write.IbEnd() + 1; sparsesegNew.ibLast = sparseseg.ibLast; - Call( ErrToErr>( arraysparseseg.ErrSetEntry( arraysparseseg.Size(), sparsesegNew ) ) ); + Call( ErrToErr>( arraysparseseg.ErrAppendEntry( sparsesegNew ) ) ); } else if ( write.IbStart() < sparseseg.ibLast && write.IbEnd() >= sparseseg.ibLast ) { sparsesegNew.ibFirst = sparseseg.ibFirst; sparsesegNew.ibLast = write.IbStart() - 1; - Call( ErrToErr>( arraysparseseg.ErrSetEntry( arraysparseseg.Size(), sparsesegNew ) ) ); + Call( ErrToErr>( arraysparseseg.ErrAppendEntry( sparsesegNew ) ) ); } else if ( write.IbStart() < sparseseg.ibFirst && write.IbEnd() >= sparseseg.ibFirst ) { sparsesegNew.ibFirst = write.IbEnd() + 1; sparsesegNew.ibLast = sparseseg.ibLast; - Call( ErrToErr>( arraysparseseg.ErrSetEntry( arraysparseseg.Size(), sparsesegNew ) ) ); + Call( ErrToErr>( arraysparseseg.ErrAppendEntry( sparsesegNew ) ) ); } } diff --git a/dev/ese/src/os/edbg.cxx b/dev/ese/src/os/edbg.cxx index b637f0e8..1f5f8819 100644 --- a/dev/ese/src/os/edbg.cxx +++ b/dev/ese/src/os/edbg.cxx @@ -13355,7 +13355,7 @@ ERR ErrPopulateUsageArray(CArray * rgUsage, const CRedBlackTreeNodeKey(), pnode->Data() ); - if ( rgUsage->ErrSetEntry( rgUsage->Size(), entryToAdd ) != CArray::ERR::errSuccess ) + if ( rgUsage->ErrAppendEntry( entryToAdd ) != CArray::ERR::errSuccess ) { Error( ErrERRCheck( JET_errOutOfMemory ) ); } diff --git a/dev/ese/src/os/osfile.cxx b/dev/ese/src/os/osfile.cxx index f299b3a5..57c8340c 100644 --- a/dev/ese/src/os/osfile.cxx +++ b/dev/ese/src/os/osfile.cxx @@ -1627,9 +1627,9 @@ ERR ErrIORetrieveSparseSegmentsInRegion( IFileAPI* const sparseseg.ibLast = min( ibAlloc - 1, ibLast ); } - Call( ( parrsparseseg->ErrSetEntry( parrsparseseg->Size(), sparseseg ) == CArray::ERR::errSuccess ) ? - JET_errSuccess : - ErrERRCheck( JET_errOutOfMemory ) ); + Call( ( parrsparseseg->ErrAppendEntry( sparseseg ) == CArray::ERR::errSuccess ) ? + JET_errSuccess : + ErrERRCheck( JET_errOutOfMemory ) ); } else { diff --git a/test/ese/src/devlibtest/collection/collectionunit/array.cxx b/test/ese/src/devlibtest/collection/collectionunit/array.cxx index 5827bbdf..21e2a2bd 100644 --- a/test/ese/src/devlibtest/collection/collectionunit/array.cxx +++ b/test/ese/src/devlibtest/collection/collectionunit/array.cxx @@ -51,7 +51,7 @@ bool FTestArraySortAndSearch( CArray& array, const INT* const rgiUnsorted, const INT iMustNotBeFound = g_defaultElement; - TestCheck( CArray::ERR::errSuccess == array.ErrSetSize( 0 ) ); + array.Clear(); TestCheck( 0 == array.Size() ); // searching on empty array @@ -63,8 +63,6 @@ bool FTestArraySortAndSearch( CArray& array, const INT* const rgiUnsorted, // inserting unsorted objects - - for ( size_t i = 0; i < ci; i++ ) { const INT iKey = rgiUnsorted[ i ]; @@ -488,9 +486,53 @@ ERR ArrayTest::ErrTest() TestCheck( g_defaultElement == arrayClone.Entry( 76 ) ); TestCheck( ( g_elementMult * 77 ) == arrayClone.Entry( 77 ) ); + // cleaning to start append tests + + array.Clear(); + TestCheck( 0 == array.Size() ); + + // append elements + + for ( size_t iElement = 0; iElement < 10; iElement++ ) + { + const INT iKey = (INT)( 10 * ( iElement + 1 ) ); + + TestCheck( CArray::ERR::errSuccess == array.ErrAppendEntry( iKey ) ); + TestCheck( ( iElement + 1 ) == array.Size() ); + + for ( size_t jElement = 0; jElement <= iElement; jElement++ ) + { + const INT iKeyExpected = (INT)( 10 * ( jElement + 1 ) ); + const INT iKeyActual = array[ jElement ]; + TestCheck( iKeyExpected == iKeyActual ); + } + } + + // remove last element + + TestCheck( 10 == array.Size() ); + for ( size_t iElement = 0; iElement < 11; iElement++ ) + { + const size_t cSizeBefore = array.Size(); + const bool fRemovedExpected = ( iElement < 10 ); + const bool fRemovedActual = array.FRemoveLastEntry(); + const size_t cSizeAfter = array.Size(); + + TestCheck( fRemovedExpected == fRemovedActual ); + TestCheck( fRemovedActual ? ( cSizeAfter == ( cSizeBefore - 1 ) ) : ( cSizeAfter == cSizeBefore ) ); + + for ( size_t jElement = 0; jElement < cSizeAfter; jElement++ ) + { + const INT iKeyExpected = (INT)( 10 * ( jElement + 1 ) ); + const INT iKeyActual = array[ jElement ]; + TestCheck( iKeyExpected == iKeyActual ); + } + } + TestCheck( 0 == array.Size() ); + // cleaning to start sort/search tests - TestCheck( CArray::ERR::errSuccess == array.ErrSetSize( 0 ) ); + array.Clear(); TestCheck( 0 == array.Size() ); // inserting and sorting/searching: empty diff --git a/test/ese/src/devlibtest/oslayer/oslayerunit/sparsefiletest.cxx b/test/ese/src/devlibtest/oslayer/oslayerunit/sparsefiletest.cxx index 451f36fc..15cb275b 100644 --- a/test/ese/src/devlibtest/oslayer/oslayerunit/sparsefiletest.cxx +++ b/test/ese/src/devlibtest/oslayer/oslayerunit/sparsefiletest.cxx @@ -92,55 +92,55 @@ ERR TestSparseFile::ErrTest() OSTestCheck( 0 == cbAllocated ); // Beyond EOF sparse segments only. - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 0, 0, parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( 0 == (*parrsparseseg)[0].ibFirst ); OSTestCheck( 0 == (*parrsparseseg)[0].ibLast ); - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 0, 1, parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( 0 == (*parrsparseseg)[0].ibFirst ); OSTestCheck( 1 == (*parrsparseseg)[0].ibLast ); - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 0, 1000, parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( 0 == (*parrsparseseg)[0].ibFirst ); OSTestCheck( 1000 == (*parrsparseseg)[0].ibLast ); - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 1, 1, parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( 1 == (*parrsparseseg)[0].ibFirst ); OSTestCheck( 1 == (*parrsparseseg)[0].ibLast ); - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 1, 2, parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( 1 == (*parrsparseseg)[0].ibFirst ); OSTestCheck( 2 == (*parrsparseseg)[0].ibLast ); - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 1, 1000, parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( 1 == (*parrsparseseg)[0].ibFirst ); OSTestCheck( 1000 == (*parrsparseseg)[0].ibLast ); - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 1000, 1000, parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( 1000 == (*parrsparseseg)[0].ibFirst ); OSTestCheck( 1000 == (*parrsparseseg)[0].ibLast ); - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 1000, 1001, parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( 1000 == (*parrsparseseg)[0].ibFirst ); OSTestCheck( 1001 == (*parrsparseseg)[0].ibLast ); - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 1000, 2000, parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( 1000 == (*parrsparseseg)[0].ibFirst ); @@ -196,12 +196,12 @@ ERR TestSparseFile::ErrTest() OSTestCheck( qwOnDiskSize == qwSize ); // When the entire file is allocated, there should be no sparse segments. - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 0, cbTotal - 1, parrsparseseg ) ); OSTestCheck( 0 == parrsparseseg->Size() ); // Also check if the region does not start at the beginning. - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 65536, cbTotal - 1, parrsparseseg ) ); OSTestCheck( 0 == parrsparseseg->Size() ); @@ -223,7 +223,7 @@ ERR TestSparseFile::ErrTest() OSTestCheck( qwOnDiskSize == cbExpectedSize ); // We should only see the single 64k block. - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 0, cbTotal - 1, parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( (128*1024) == (*parrsparseseg)[0].ibFirst ); @@ -276,80 +276,80 @@ ERR TestSparseFile::ErrTest() OSTestCheck( 0 == cbAllocated ); // It will be at offset 64k, until 192k - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 0, cbTotal - 1, parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( 65536 == (*parrsparseseg)[0].ibFirst ); OSTestCheck( (64+128)*1024 - 1 == (*parrsparseseg)[0].ibLast ); // The sparse range is [64k,192k]. Asking for [64k,128k] and [128k,192k] should return just that range. - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 65536, 131072 - 1, parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( 65536 == (*parrsparseseg)[0].ibFirst ); OSTestCheck( 131072 - 1 == (*parrsparseseg)[0].ibLast ); // The sparse range is [64k,192k]. Asking for [64k,128k] and [128k,192k] should return just that range. - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 131072, (192*1024) - 1, parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( 131072 == (*parrsparseseg)[0].ibFirst ); OSTestCheck( 192*1024 - 1 == (*parrsparseseg)[0].ibLast ); // Query up to immediately before. - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 0, (64*1024) - 1, parrsparseseg ) ); OSTestCheck( 0 == parrsparseseg->Size() ); // Query up to immediately before. - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 10, (64*1024) - 1, parrsparseseg ) ); OSTestCheck( 0 == parrsparseseg->Size() ); // Query up to the first byte. - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 0, (64*1024), parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( (64*1024) == (*parrsparseseg)[0].ibFirst ); OSTestCheck( (64*1024) == (*parrsparseseg)[0].ibLast ); // Query up to the second byte. - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 0, (64*1024) + 1, parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( (64*1024) == (*parrsparseseg)[0].ibFirst ); OSTestCheck( (64*1024) + 1 == (*parrsparseseg)[0].ibLast ); // Query up to one byte shy of the last. - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 0, (192*1024) - 2, parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( (64*1024) == (*parrsparseseg)[0].ibFirst ); OSTestCheck( (192*1024) - 2 == (*parrsparseseg)[0].ibLast ); // Query up to the last byte. - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 0, (192*1024) - 1, parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( (64*1024) == (*parrsparseseg)[0].ibFirst ); OSTestCheck( (192*1024) - 1 == (*parrsparseseg)[0].ibLast ); // Query up to one byte past the last. - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 0, (192*1024), parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( (64*1024) == (*parrsparseseg)[0].ibFirst ); OSTestCheck( (192*1024) - 1 == (*parrsparseseg)[0].ibLast ); // Segment is fully contained. - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, (64*1024) + 10, (192*1024) - 10, parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( (64*1024) + 10 == (*parrsparseseg)[0].ibFirst ); OSTestCheck( (192*1024) - 10 == (*parrsparseseg)[0].ibLast ); // Segment is partially contained. - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, (64*1024) + 10, (192*1024) + 10, parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( (64*1024) + 10 == (*parrsparseseg)[0].ibFirst ); @@ -371,14 +371,14 @@ ERR TestSparseFile::ErrTest() // Specifying a range that goes beyond EOF: // It believes that anything beyond EOF is 'sparse', and in a way it is: There is no // backing storage at the moment, and the FS will allocate storage for a write. - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, cbTotal - 65536, cbTotal + 65536 - 1, parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( cbTotal == (*parrsparseseg)[0].ibFirst ); OSTestCheck( cbTotal + 65536 - 1 == (*parrsparseseg)[0].ibLast ); // Truly sparse + EOF. - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 0, cbTotal + 65536 - 1, parrsparseseg ) ); OSTestCheck( 2 == parrsparseseg->Size() ); OSTestCheck( (64*1024) == (*parrsparseseg)[0].ibFirst ); @@ -387,7 +387,7 @@ ERR TestSparseFile::ErrTest() OSTestCheck( cbTotal + 65536 - 1 == (*parrsparseseg)[1].ibLast ); // Truly sparse + EOF. - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, (64*1024) + 10, cbTotal + 65536 - 1, parrsparseseg ) ); OSTestCheck( 2 == parrsparseseg->Size() ); OSTestCheck( (64*1024) + 10 == (*parrsparseseg)[0].ibFirst ); @@ -408,21 +408,21 @@ ERR TestSparseFile::ErrTest() OSTestCheck( 0 == cbAllocated ); // It will be at offset ibAlmostEnd. - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, ibAlmostEnd - 65536, cbTotal - 1, parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( ibAlmostEnd == (*parrsparseseg)[0].ibFirst ); OSTestCheck( qwSize - 1 == (*parrsparseseg)[0].ibLast ); // It will be at offset ibAlmostEnd. - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, ibAlmostEnd, cbTotal - 1, parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( ibAlmostEnd == (*parrsparseseg)[0].ibFirst ); OSTestCheck( qwSize - 1 == (*parrsparseseg)[0].ibLast ); // There are two different sparse regions. We should see both of them: - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 131072, cbTotal - 1, parrsparseseg ) ); OSTestCheck( 2 == parrsparseseg->Size() ); OSTestCheck( 131072 == (*parrsparseseg)[0].ibFirst ); @@ -435,21 +435,21 @@ ERR TestSparseFile::ErrTest() // // Specifying a range that goes beyond EOF will not be well defined. - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, ibAlmostEnd - 65536, cbTotal + 65536 - 1, parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( ibAlmostEnd == (*parrsparseseg)[0].ibFirst ); OSTestCheck( cbTotal + 65536 - 1 == (*parrsparseseg)[0].ibLast ); // Specifying unaligned ranges: - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 65*1024 + 1, 100*1024, parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( 65*1024 + 1 == (*parrsparseseg)[0].ibFirst ); OSTestCheck( 100*1024 == (*parrsparseseg)[0].ibLast ); // Specifying unaligned start/end (but outside of the sparse region): - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 1, 200*1024, parrsparseseg ) ); OSTestCheck( 1 == parrsparseseg->Size() ); OSTestCheck( 64*1024 == (*parrsparseseg)[0].ibFirst ); @@ -467,7 +467,7 @@ ERR TestSparseFile::ErrTest() // Sparse = { [64k,192k], [512k,576k], [960k,1024k] }. // There are multiple regions. We should see all of them. - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 0, cbTotal - 1, parrsparseseg ) ); OSTestCheck( 3 == parrsparseseg->Size() ); OSTestCheck( 64*1024 == (*parrsparseseg)[0].ibFirst ); @@ -478,7 +478,7 @@ ERR TestSparseFile::ErrTest() OSTestCheck( cbTotal - 1 == (*parrsparseseg)[2].ibLast ); // There are multiple regions. We should see all of them (+ EOF). - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 0, cbTotal + 10, parrsparseseg ) ); OSTestCheck( 3 == parrsparseseg->Size() ); OSTestCheck( 64*1024 == (*parrsparseseg)[0].ibFirst ); @@ -489,7 +489,7 @@ ERR TestSparseFile::ErrTest() OSTestCheck( cbTotal + 10 == (*parrsparseseg)[2].ibLast ); // There are multiple regions. We should see all of them (start from middle of first one). - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 131072, cbTotal - 1, parrsparseseg ) ); OSTestCheck( 3 == parrsparseseg->Size() ); OSTestCheck( 131072 == (*parrsparseseg)[0].ibFirst ); @@ -500,7 +500,7 @@ ERR TestSparseFile::ErrTest() OSTestCheck( cbTotal - 1 == (*parrsparseseg)[2].ibLast ); // The query range stopping at the beginning of the last sparse region should not get confused: - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 131072, ibAlmostEnd - 1, parrsparseseg ) ); OSTestCheck( 2 == parrsparseseg->Size() ); OSTestCheck( 131072 == (*parrsparseseg)[0].ibFirst ); @@ -509,7 +509,7 @@ ERR TestSparseFile::ErrTest() OSTestCheck( 576*1024 - 1 == (*parrsparseseg)[1].ibLast ); // The query range stopping in the middle of the last sparse region should not get confused: - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 131072, cbTotal - 32768 - 1, parrsparseseg ) ); OSTestCheck( 3 == parrsparseseg->Size() ); OSTestCheck( 131072 == (*parrsparseseg)[0].ibFirst ); @@ -520,7 +520,7 @@ ERR TestSparseFile::ErrTest() OSTestCheck( cbTotal-32768 - 1 == (*parrsparseseg)[2].ibLast ); // The query range stopping in the middle of the sparse region in the middle of the file should not get confused: - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 131072, (512+32)*1024 - 1, parrsparseseg ) ); OSTestCheck( 2 == parrsparseseg->Size() ); OSTestCheck( 131072 == (*parrsparseseg)[0].ibFirst ); @@ -529,7 +529,7 @@ ERR TestSparseFile::ErrTest() OSTestCheck( (512+32)*1024 - 1 == (*parrsparseseg)[1].ibLast ); // The query range starting at the beginning of an allocated region should not get confused: - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, (128+64)*1024, cbTotal - 1, parrsparseseg ) ); OSTestCheck( 2 == parrsparseseg->Size() ); OSTestCheck( 512*1024 == (*parrsparseseg)[0].ibFirst ); @@ -545,7 +545,7 @@ ERR TestSparseFile::ErrTest() // Sparse = { [0k,192k], [512k,576k], [960k,1024k] }. // The query range starting at the beginning of an allocated regionshould not get confused: - OSTestCheck( CArray::ERR::errSuccess == parrsparseseg->ErrSetSize( 0 ) ); + parrsparseseg->Clear(); OSTestCheckErr( ErrIORetrieveSparseSegmentsInRegion( pfapi, 0, cbTotal - 1, parrsparseseg ) ); OSTestCheck( 3 == parrsparseseg->Size() ); OSTestCheck( 0 == (*parrsparseseg)[0].ibFirst ); From be082a9c6d3c6f4d92c6624a8040dbd7f7b56cf3 Mon Sep 17 00:00:00 2001 From: Brett Shirley Date: Tue, 15 Nov 2022 01:44:38 +0000 Subject: [PATCH 100/102] Fix the crash for pinst contextless memory failure tags. Note: I haven't yet been able to debug this from the dump, but this is almost assuredly the issue. [Substrate:6da1bbb9fb5b2d492770026db9b858fa1a59e38f] --- dev/ese/src/ese/_osu/hapublishu.cxx | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/dev/ese/src/ese/_osu/hapublishu.cxx b/dev/ese/src/ese/_osu/hapublishu.cxx index 807cae40..a8237d66 100644 --- a/dev/ese/src/ese/_osu/hapublishu.cxx +++ b/dev/ese/src/ese/_osu/hapublishu.cxx @@ -149,6 +149,7 @@ void OSUHAEmitFailureTag_( } } } + #define pinst pinstDoNotUseForRestOfFuncUse_pinstActual // FUTURE: HA Publish is only for O365 datacenter, but even so this is a bit of a layering violation. We will // add these temporarily to do a basic health check on O365 to see if we're dropping HA FailureItems from any ESE @@ -288,18 +289,18 @@ void OSUHAEmitFailureTag_( iwsz, rgwsz ); - AtomicExchangeSet( (ULONG*)&pinst->m_grbitHaFailureTags, (ULONG)bitHaPublishedEvent ); + AtomicExchangeSet( (ULONG*)&pinstActual->m_grbitHaFailureTags, (ULONG)bitHaPublishedEvent ); if ( haTag == HaDbFailureTagCorruption ) { - AtomicExchangeSet( (ULONG*)&pinst->m_grbitHaFailureTags, (ULONG)bitHaPublishedCorruptionTag ); + AtomicExchangeSet( (ULONG*)&pinstActual->m_grbitHaFailureTags, (ULONG)bitHaPublishedCorruptionTag ); } if ( haTag == HaDbFailureTagIoHard ) { - AtomicExchangeSet( (ULONG*)&pinst->m_grbitHaFailureTags, (ULONG)bitHaPublishedIoHardTag ); + AtomicExchangeSet( (ULONG*)&pinstActual->m_grbitHaFailureTags, (ULONG)bitHaPublishedIoHardTag ); } if ( haTag == HaDbFailureTagLogLogicallyInconsistent ) { - AtomicExchangeSet( (ULONG*)&pinst->m_grbitHaFailureTags, (ULONG)bitHaPublishedLogLogicallyInconsistentTag ); + AtomicExchangeSet( (ULONG*)&pinstActual->m_grbitHaFailureTags, (ULONG)bitHaPublishedLogLogicallyInconsistentTag ); } } @@ -309,6 +310,8 @@ void OSUHAEmitFailureTag_( { pcritInstActual->Leave(); } + + #undef pinst } #endif From 005c2645a89b702334976e0c11deeed679d2bb94 Mon Sep 17 00:00:00 2001 From: Anil Ruia Date: Wed, 16 Nov 2022 16:51:30 +0000 Subject: [PATCH 101/102] Do not try to delete preceding RCEs when the current RCE is a Delta RCE as the preceding one can be an uncommitted Delta RCE [Substrate:a12c014d42b9048e5e443d5d42aa004b78fb7911] --- dev/ese/src/ese/ver.cxx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dev/ese/src/ese/ver.cxx b/dev/ese/src/ese/ver.cxx index 916e1808..64a2ac01 100644 --- a/dev/ese/src/ese/ver.cxx +++ b/dev/ese/src/ese/ver.cxx @@ -6023,7 +6023,8 @@ ERR RCE::ErrPrepareToDeallocate( TRX trxOldest ) if ( PinstFromIfmp( m_ifmp )->FRecovering() ) { RCE *prceFirst = this; - while ( prceFirst->m_prcePrevOfNode != NULL ) + // With Delta RCEs, you can have uncommitted delta RCEs preceding it, so let them be. + while ( prceFirst->Oper() != operDelta && prceFirst->Oper() != operDelta64 && prceFirst->m_prcePrevOfNode != NULL ) { Assert( prceFirst->m_prcePrevOfNode->m_prceNextOfNode == prceFirst ); prceFirst = prceFirst->m_prcePrevOfNode; From a184d5dd19750cba58e8eccc92294b304899f3bb Mon Sep 17 00:00:00 2001 From: Alexandre Costa Date: Fri, 18 Nov 2022 16:54:20 +0000 Subject: [PATCH 102/102] Hybrid Shrink phase 1: avoid breaking up contiguous leaf-page runs when moving data. **Summary** This change implements the ability to move multiple pages which are contiguous within an extent to a destination extent which is tailored to accommodate those pages without creating fragmentation, while also keep the original contiguity aspect of that set of pages. This is a pre-requisite to Hybrid Shrink because we will be operating potentially on large portions of live databases, so we do not want to break existing contiguity, which would be particularly detrimental to HDD-hosted databases. The code will already start running as part of attach-time Shrink, when we come across leaf, non-space pages. Currently, we move those pages one by one without any awareness of contiguity. **Details** - Added new flight param JET_paramFlight_ContiguousExtentMoveShrinkEnabled to switch the feature on/off. By default, it'll be **off** in retail ESE and **on** in debug ESE. However, clients above ESE will start with it always **on** during the test pass, as well as in PDT and NAMPR00DG006. - Added new flight JET_paramFlight_HierarchicalSpaceAllocFlagsEnabled. In the process of developing this change, I found a bug where some space request options were not being passed along when space was requested to the parent object of the current tree. This parameter turns that fix on/off. It'll be checked-in with it always **on**, but I've decided to add the flight to turn it off in case it is needed. - Changed wrnBTShallowTree to errBTShallowTree. The motivating factor was that the warning was being returned in cases where the requested operation was **not** actually performed, so it was semantically wrong, in addition to forcing code to always check specifically for that warning. - Implemented ErrBTContiguousExtentMove, which is the core of this change. - Updated the current attach-time Shrink code to use the new feature. - Renamed BOOKMARK_COPY to BOOKMARK_BUFFER and added some functionality. - Added new test case to cover new functionality. - Fixed a test case that broke due to the stricter requirement to move pages (contiguous destination extent). [Substrate:38dda6171b4f83a7419453edef8c6c8e7fe46e38] --- dev/ese/published/inc/jethdr.w | 7 +- dev/ese/src/_errstr/errdata.txt | 2 +- dev/ese/src/_res/jetmsg.mc | 2 + dev/ese/src/ese/_log/logredo.cxx | 2 +- dev/ese/src/ese/bt.cxx | 243 ++++++++++++++++----- dev/ese/src/ese/dbscan.cxx | 3 +- dev/ese/src/ese/dbshrink.cxx | 102 +++++++-- dev/ese/src/ese/fmp_test.cxx | 1 + dev/ese/src/ese/jetapi.cxx | 17 +- dev/ese/src/ese/lv.cxx | 6 +- dev/ese/src/ese/old.cxx | 13 +- dev/ese/src/ese/space.cxx | 306 +++++++++++++++------------ dev/ese/src/ese/sysparamtable.g.cxx | 8 +- dev/ese/src/inc/_space.hxx | 12 ++ dev/ese/src/inc/bt.hxx | 22 +- dev/ese/src/inc/daedef.hxx | 213 ++++++++++++++----- dev/ese/src/inc/fmp.hxx | 6 + dev/ese/src/inc/space.hxx | 3 +- dev/ese/src/noncore/interop/params.h | 5 +- 19 files changed, 669 insertions(+), 304 deletions(-) diff --git a/dev/ese/published/inc/jethdr.w b/dev/ese/published/inc/jethdr.w index 283746f1..25a45edb 100644 --- a/dev/ese/published/inc/jethdr.w +++ b/dev/ese/published/inc/jethdr.w @@ -3971,7 +3971,7 @@ typedef enum // end_PubEsent #if ( JET_VERSION >= 0x0A01 ) -#define JET_paramFlight_SmoothIoTestPermillage 55 // The per mille of total (or one thousandths, or tenths of a percent) of IO should be made smooth. Ex(s): 995(/1000) = 99.5% smooth, 10(/1000) = 1%, etc. 0 = disabled. +#define JET_paramFlight_HierarchicalSpaceAllocFlagsEnabled 55 // Whether we want to pass the space allocation flags along when asking for space to the parent of an object. #define JET_paramElasticWaypointLatency 56 // Amount of extra elastic waypoint latency #define JET_paramFlight_SynchronousLVCleanup 57 // Perform synchronous cleanup (actual delete) of LVs instead of flag delete with cleanup happening later #define JET_paramFlight_RBSRevertIOUrgentLevel 58 // IO urgent level for reverting the databases using RBS. Used to decide how many outstanding I/Os will be allowed. @@ -4253,9 +4253,10 @@ typedef enum // end_PubEsent #define JET_paramFlight_UseCngAes256Implementation 221 // Whether to use the CNG based implementation (rather than CAPI based one) for AES256 encryption +#define JET_paramFlight_ContiguousExtentMoveShrinkEnabled 222 // Whether we want to move contiguous pages to a contiguous destination extent during DB Shrink. // begin_PubEsent -#define JET_paramMaxValueInvalid 222 // This is not a valid parameter. It can change from release to release! +#define JET_paramMaxValueInvalid 223 // This is not a valid parameter. It can change from release to release! // end_PubEsent #if ( JET_VERSION >= 0x0A01 ) @@ -6075,7 +6076,7 @@ typedef JET_ERR (JET_API * JET_PFNEMITLOGDATA)( // begin_PubEsent #define JET_errNTSystemCallFailed -334 // A call to the operating system failed // end_PubEsent -#define wrnBTShallowTree 335 // BTree is only one or two levels deep +#define errBTShallowTree -335 // BTree is only one or two levels deep #define errBTMergeNotSynchronous -336 // Multiple threads attempting to perform merge/split on same page (likely OLD vs. RCEClean) #define wrnSPReservedPages 337 // space manager reserved pages for future space tree splits // begin_PubEsent diff --git a/dev/ese/src/_errstr/errdata.txt b/dev/ese/src/_errstr/errdata.txt index 169bc5d7..165a2071 100644 --- a/dev/ese/src/_errstr/errdata.txt +++ b/dev/ese/src/_errstr/errdata.txt @@ -95,7 +95,7 @@ INTERNAL_ERR( -332, Unknown, errSPOutOfOwnExtCacheSpace ) INTERNAL_WRN( 333, Unknown, wrnBTMultipageOLC ) EXTERNAL_ERR( -334, Operation, JET_errNTSystemCallFailed ) // eseutil only today - INTERNAL_WRN( 335, Unknown, wrnBTShallowTree ) + INTERNAL_ERR( -335, Unknown, errBTShallowTree ) INTERNAL_ERR( -336, Unknown, errBTMergeNotSynchronous ) INTERNAL_WRN( 337, Unknown, wrnSPReservedPages ) EXTERNAL_ERR( -338, Corruption, JET_errBadParentPageLink ) diff --git a/dev/ese/src/_res/jetmsg.mc b/dev/ese/src/_res/jetmsg.mc index 69a4ed34..31e0d394 100644 --- a/dev/ese/src/_res/jetmsg.mc +++ b/dev/ese/src/_res/jetmsg.mc @@ -2524,6 +2524,7 @@ Pages shelved: %24 page(s).%n Pages unleaked: %25 page(s).%n Return code: %17%n Stop reason: %18%n +Available space below target: %34 bytes (%35 page(s)).%n Small-space trees converted: %27 tree(s).%n Root pages moved (regular, space): %28 page(s), %29 page(s).%n Strictly-internal pages moved (regular, space): %30 page(s), %31 page(s).%n @@ -2549,6 +2550,7 @@ Pages shelved: %24 page(s).%n Pages unleaked: %25 page(s).%n Error code: %17%n Stop reason: %18%n +Available space below target: %34 bytes (%35 page(s)).%n Small-space trees converted: %27 tree(s).%n Root pages moved (regular, space): %28 page(s), %29 page(s).%n Strictly-internal pages moved (regular, space): %30 page(s), %31 page(s).%n diff --git a/dev/ese/src/ese/_log/logredo.cxx b/dev/ese/src/ese/_log/logredo.cxx index 1f0e84aa..0f5b0def 100644 --- a/dev/ese/src/ese/_log/logredo.cxx +++ b/dev/ese/src/ese/_log/logredo.cxx @@ -5377,7 +5377,7 @@ ERR LOG::ErrLGRICheckRedoAttachDb( const BOOL fMatchingLoggedSignLog = ( 0 == memcmp( &pdbfilehdr->signLog, psignLogged, sizeof(SIGNATURE) ) ); // When we are recovering a dirty-and-patched database, it's possible that lGenMinRequired gets - // stalled due to pending redo map entries. When that happens and there are mulitple attach/detach + // stalled due to pending redo map entries. When that happens and there are multiple attach/detach // cycles before the redo map entries are resolved, we could have lgposAttach ahead of lGenMinRequired. // In that case, we need to reset lGenMinRequired and lgposAttach so that we are forced to re-attach // and rebuild the redo maps. Note that ErrIsamEndDatabaseIncrementalReseed() does something similar to diff --git a/dev/ese/src/ese/bt.cxx b/dev/ese/src/ese/bt.cxx index 2abe6172..24f5caf6 100644 --- a/dev/ese/src/ese/bt.cxx +++ b/dev/ese/src/ese/bt.cxx @@ -11313,6 +11313,7 @@ LOCAL ERR ErrBTIGetNewPages( FUCB *pfucb, SPLITPATH *psplitPathLeaf, DIRFLAG dir CpgDIRActiveSpaceRequestReserve( pfucb ) != cpgDIRReserveConsumed && ( ( psplit->fNewPageFlags & CPAGE::fPageLeaf ) || fVerticalToLeafSplit ) ) { + // If there is an active reserve request outstanding and this is a leaf page allocation, // indicate to space to consume the active reserve. fSPAllocFlags |= ( fSPContinuous | fSPUseActiveReserve ); @@ -13521,9 +13522,10 @@ ERR ErrBTIMultipageCleanup( const BOOL fRightMerges, __inout_opt PrereadInfo * const pPrereadInfo ) { - ERR err; + ERR err = JET_errSuccess; MERGEPATH *pmergePath = NULL; - PIBTraceContextScope tcScope = TcBTICreateCtxScope( pfucb, iorsBTMerge ); + PIBTraceContextScope tcScope = TcBTICreateCtxScope( pfucb, iorsBTMerge ); + BOOL fValidBmNext = fFalse; if ( pmergetype ) { @@ -13534,27 +13536,12 @@ ERR ErrBTIMultipageCleanup( { // btree is scheduled for deletion - don't bother attempting cleanup // - if ( NULL != pbmNext ) - { - pbmNext->key.suffix.SetCb( 0 ); - pbmNext->data.SetCb( 0 ); - } - - return JET_errSuccess; + goto HandleError; } // get path RIW latched // Call( ErrBTICreateMergePath( pfucb, bm, pgnoNull, fTrue, &pmergePath, pPrereadInfo ) ); - if ( wrnBTShallowTree == err ) - { - if ( NULL != pbmNext ) - { - pbmNext->key.suffix.SetCb( 0 ); - pbmNext->data.SetCb( 0 ); - } - goto HandleError; - } // check if merge conditions hold // @@ -13564,16 +13551,15 @@ ERR ErrBTIMultipageCleanup( if ( mergetypeEmptyTree == pmergePath->pmerge->mergetype ) { - if ( NULL != pbmNext ) - { - pbmNext->key.suffix.SetCb( 0 ); - pbmNext->data.SetCb( 0 ); - } - + // This always releases pmergePath. err = ErrBTIMergeEmptyTree( pfucb, pmergePath ); - return err; + pmergePath = NULL; + + goto HandleError; } + fValidBmNext = fTrue; + // release pages not involved in merge // BTIMergeReleaseUnneededPages( pmergePath ); @@ -13668,6 +13654,17 @@ ERR ErrBTIMultipageCleanup( HandleError: BTIReleaseMergePaths( pmergePath ); + + if ( ( pbmNext != NULL ) && ( !fValidBmNext || ( err < JET_errSuccess ) ) ) + { + pbmNext->Reset(); + } + + if ( err == errBTShallowTree ) + { + err = JET_errSuccess; + } + return err; } @@ -13933,7 +13930,7 @@ LOCAL ERR ErrBTICreateMergePath( FUCB *pfucb, { // tree is too shallow to bother doing merges on // - Error( ErrERRCheck( wrnBTShallowTree ) ); + Error( ErrERRCheck( errBTShallowTree ) ); } BOOL fLeftEdgeOfBtree = fTrue; @@ -13999,7 +13996,7 @@ LOCAL ERR ErrBTICreateMergePath( FUCB *pfucb, const MERGEPATH * const pmergePathParent = (*ppmergePath)->pmergePathParent; // if root page was also a leaf page or the internal page we're looking for, we would have - // err'd out above with wrnBTShallowTree + // err'd out above with errBTShallowTree Assert( NULL != pmergePathParent ); Assert( !( pcsr->Cpage().FRootPage() ) ); @@ -14084,15 +14081,13 @@ LOCAL ERR ErrBTICreateMergePath( FUCB *pfucb, #ifdef DEBUG if ( err >= JET_errSuccess ) { - if ( err != wrnBTShallowTree ) - { - Assert( !!(*ppmergePath)->csr.Cpage().FLeafPage() == !!fLeafPage ); - } - else - { - Assert( (*ppmergePath)->csr.Cpage().FRootPage() ); - } + Assert( !!(*ppmergePath)->csr.Cpage().FLeafPage() == !!fLeafPage ); } + else if ( err == errBTShallowTree ) + { + Assert( (*ppmergePath)->csr.Cpage().FRootPage() ); + } + #endif return err; @@ -14103,7 +14098,7 @@ LOCAL ERR ErrBTICreateMergePath( FUCB *pfucb, LOCAL VOID BTIMergeCopyNextBookmark( FUCB * const pfucb, MERGEPATH * const pmergePathLeaf, BOOKMARK * const pbmNext, - const BOOL fRightMerges ) + const BOOL fRightMerges ) // ================================================================ // // Copies next bookmark to seek for online defrag. @@ -14136,8 +14131,7 @@ LOCAL VOID BTIMergeCopyNextBookmark( FUCB * const pfucb, // if ( pcsr->Pgno() == pgnoNull ) { - pbmNext->key.suffix.SetCb( 0 ); - pbmNext->data.SetCb( 0 ); + pbmNext->Reset(); return; } @@ -15760,9 +15754,9 @@ LOCAL VOID BTIUpdatePagePointer( // ================================================================ LOCAL VOID BTIMovePageCopyNextBookmark( - _In_ const FUCB * const pfucb, + _In_ const FUCB * const pfucb, _In_ const MERGEPATH * const pmergePath, - __inout BOOKMARK * const pbmNext ) + _Inout_ BOOKMARK * const pbmNext ) // ================================================================ // // copies the bookmark from the right-hand page for a move @@ -15777,12 +15771,9 @@ LOCAL VOID BTIMovePageCopyNextBookmark( Assert( pbmNext ); Assert( NULL != pbmNext->key.suffix.Pv() ); - if( pgnoNull == pmergePath->pmerge->csrRight.Pgno()) + if ( pgnoNull == pmergePath->pmerge->csrRight.Pgno() ) { - pbmNext->key.prefix.SetCb( 0 ); - pbmNext->key.suffix.SetCb( 0 ); - pbmNext->data.SetCb( 0 ); - Assert( pbmNext->key.FNull() ); + pbmNext->Reset(); } else { @@ -16060,6 +16051,7 @@ LOCAL ERR ErrBTIPageMove( { Call( ErrBTIMergeLatchSiblingPages( pfucb, pmergePath ) ); } + Call( ErrBTIPageMoveAllocatePage( pfucb, pmergePath, fSPAllocFlags, dirflag ) ); BTIMergeReleaseUnneededPages( pmergePath ); @@ -16139,7 +16131,7 @@ ERR ErrBTPageMove( _In_ const PGNO pgnoSource, _In_ const BOOL fLeafPage, _In_ const ULONG fSPAllocFlags, - __inout BOOKMARK * const pbmNext ) + _Inout_ BOOKMARK * const pbmNext ) // ================================================================ { Assert( pfucb ); @@ -16160,17 +16152,6 @@ ERR ErrBTPageMove( } Call( ErrBTICreateMergePath( pfucb, bm, pgnoSource, fLeafPage, &pmergePath ) ); - if ( wrnBTShallowTree == err ) - { - if ( pbmNext ) - { - pbmNext->key.prefix.SetCb( 0 ); - pbmNext->key.suffix.SetCb( 0 ); - pbmNext->data.SetCb( 0 ); - } - - goto HandleError; - } Call( ErrBTINewMerge( pmergePath ) ); pmergePath->pmerge->mergetype = mergetypePageMove; @@ -16194,6 +16175,12 @@ ERR ErrBTPageMove( HandleError: BTIReleaseMergePaths( pmergePath ); Assert( !Pcsr( pfucb )->FLatched( ) ); + + if ( ( pbmNext != NULL ) && ( err < JET_errSuccess ) ) + { + pbmNext->Reset(); + } + return err; } @@ -16260,6 +16247,148 @@ VOID BTIPerformMerge( FUCB *pfucb, MERGEPATH *pmergePathLeaf ) } } +// ================================================================ +ERR ErrBTContiguousExtentMove( + _In_ FUCB * const pfucb, + _In_ const BOOKMARK& bm, + _In_ const PGNO pgnoSourceFirst, + _Out_ CPG * const pcpgMoved ) +// ================================================================ +{ + Assert( pfucb ); + Assert( !FFUCBSpace( pfucb ) ); + Assert( !Pcsr( pfucb )->FLatched() ); + Assert( !bm.key.FNull() ); + Assert( pgnoSourceFirst != pgnoNull ); + + ERR err = JET_errSuccess; + PIBTraceContextScope tcScope = TcBTICreateCtxScope( pfucb, iorsBTMerge ); + MERGEPATH * pmergePath = NULL; + EXTENTINFO extinfoOE; + BOOKMARK_BUFFER bmbCurr, bmbNext; + CPG cpgToMove = 0; + BOOL fActiveSpaceRequestReserveCreated = fFalse; + + // We cannot keep whole portions of the data tree, as well as the space trees, locked during this operation. + // Therefore, we are going to run the discovery code below using the extent returned previously and without + // any locks, and we will, later, stop the extent move operation. + // + + // Latch the path, from root to leaf. + Call( ErrBTICreateMergePath( pfucb, bm, pgnoSourceFirst, fTrue, &pmergePath ) ); + Assert( pmergePath->csr.Cpage().PgnoThis() == pgnoSourceFirst ); + + // Get OE extent that hosts this page. + Call( ErrSPGetOwningExtent( pfucb, pgnoSourceFirst, &extinfoOE ) ); + Assert( extinfoOE.FContainsPgno( pgnoSourceFirst ) ); + + // Find a contiguous run based on the pgnos found in the parent-of-leaf. + // Note that this will destroy the merge path, as the iline will be modified. + for ( MERGEPATH * const pmergePathParent = pmergePath->pmergePathParent; + pmergePathParent->csr.ILine() < pmergePathParent->csr.Cpage().Clines(); + pmergePathParent->csr.IncrementILine() ) + { + NDGet( pfucb, &pmergePathParent->csr ); + + Assert( sizeof( PGNO ) == pfucb->kdfCurr.data.Cb() ); + const PGNO pgnoToMove = *( (UnalignedLittleEndian< PGNO > *)pfucb->kdfCurr.data.Pv() ); + Assert( pgnoToMove != pgnoNull ); + Assert( ( cpgToMove != 0 ) || ( pgnoToMove == pgnoSourceFirst ) ); // Cursor must be positioned in the first pgno. + Assert( ( cpgToMove != 1 ) || ( pgnoToMove == pmergePath->csr.Cpage().PgnoNext() ) ); // Second page is the right sibling of the first. + + // Consider it part of a run if the page is within the same extent. + if ( extinfoOE.FContainsPgno( pgnoToMove ) ) + { + cpgToMove++; + } + else + { + break; + } + } + Assert( cpgToMove > 0 ); + + // Release latch path. + BTIReleaseMergePaths( pmergePath ); + pmergePath = NULL; + + // If we have only one page to move, fallback to single page move. + // If we have multiple pages to move, reserve the exact space needed and move one by one. + // Note that, because we don't keep everything latched, things might have changed underneath us, so + // do not try to enforce that the page layout hasn't change. Otherwise, we could end up with fragmented + // extents if we often stop too early. The exception is for the first page, which can bail out before + // any space is allocated, if we pass in an expected pgno (pgnoSourceFirst, in this case). + const BOOL fSinglePageMove = ( cpgToMove == 1 ); + Call( bmbCurr.ErrAllocAndCopyKeyData( bm.key, bm.data ) ); + Call( bmbNext.ErrAllocBuffer() ); + bmbNext.NullifyAndSetPvsToBuffer(); + for ( CPG cpgMoved = 0; ( cpgMoved < cpgToMove ) && !bmbCurr.Bm().FNull(); cpgMoved++ ) + { + const BOOL fFirstPage = ( cpgMoved == 0 ); + const BOOL fUseActiveSpaceRequestReserve = !fSinglePageMove; + const BOOL fCreateActiveSpaceRequestReserve = ( fUseActiveSpaceRequestReserve && fFirstPage ); + + if ( fCreateActiveSpaceRequestReserve ) + { + DIRSetActiveSpaceRequestReserve( pfucb, cpgToMove - 1 ); + fActiveSpaceRequestReserveCreated = fTrue; + } + + err = ErrBTPageMove( + pfucb, + bmbCurr.Bm(), + fFirstPage ? pgnoSourceFirst : pgnoNull, + fTrue, + fUseActiveSpaceRequestReserve ? + ( fSPUseActiveReserve | ( fCreateActiveSpaceRequestReserve ? ( fSPContinuous | fSPExactExtent ) : fSPNoFlags ) ) : + fSPNoFlags, + bmbNext.Pbm() ); + + Call( err ); + + ( *pcpgMoved )++; + + bmbNext.CopyInto( &bmbCurr ); + bmbNext.NullifyAndSetPvsToBuffer(); + } + +HandleError: + Assert( !Pcsr( pfucb )->FLatched() ); + + if ( fActiveSpaceRequestReserveCreated ) + { + DIRResetActiveSpaceRequestReserve( pfucb ); + } + + bmbCurr.FreeBuffer(); + bmbNext.FreeBuffer(); + BTIReleaseMergePaths( pmergePath ); + +#ifdef DEBUG + FMP * const pfmp = PfmpFromIfmp( pfucb->ifmp ); + if ( pfmp->FExclusiveBySession( pfucb->ppib ) ) + { + Expected( pfmp->FShrinkIsRunning() ); + Assert( err != JET_errRecordNotFound ); + Assert( err != errBTShallowTree ); + if ( err >= JET_errSuccess ) + { + Assert( *pcpgMoved > 0 ); + Assert( *pcpgMoved == cpgToMove ); + } + } +#endif + + // Because this function is best effort only, we'll return success if at least one page has been moved. + if ( ( err > JET_errSuccess ) || ( ( err < JET_errSuccess ) && ( *pcpgMoved > 0 ) ) ) + { + err = JET_errSuccess; + } + + return err; +} + + // processes one page for merge or empty page operation // depending on the operation selection in pmergePath->flags // diff --git a/dev/ese/src/ese/dbscan.cxx b/dev/ese/src/ese/dbscan.cxx index ef0842a4..247c1150 100644 --- a/dev/ese/src/ese/dbscan.cxx +++ b/dev/ese/src/ese/dbscan.cxx @@ -3965,7 +3965,8 @@ ERR DBMScanObserverCleanup::ErrCleanupPrimaryPage_( CSR * const pcsr, DBMObjectC // In ErrBTISinglePageCleanup, ErrBTISPCDeleteNodes will nullify the node's data (replace with // a single byte NULL chSCRUBDBMaintEmptyPageLastNodeFill) but it can't remove the only node in // the page (b-tree pages can't be empty), and return MultipageOLC. Then ErrBTIMultipageCleanup will - // return wrnBTShallowTree without doing anything. + // return JET_errSuccess without doing anything (errBTShallowTree is returned from ErrBTICreateMergePath + // and translated into JET_errSuccess by ErrBTIMultipageCleanup). // // Avoid repeated replacing/scrubbing of case 1 pages diff --git a/dev/ese/src/ese/dbshrink.cxx b/dev/ese/src/ese/dbshrink.cxx index e8bf66e8..eb7158a8 100644 --- a/dev/ese/src/ese/dbshrink.cxx +++ b/dev/ese/src/ese/dbshrink.cxx @@ -4,25 +4,21 @@ #include "std.hxx" #include "errdata.hxx" #include "_bt.hxx" +#include "_space.hxx" #include "PageSizeClean.hxx" // Tracing. // -LOCAL ERR ErrSHKIShrinkEofTracingBegin( _In_ IFileSystemAPI * pfsapi, _In_ JET_PCWSTR wszDatabase, _Out_ CPRINTF** ppcprintfShrinkTraceRaw ) +LOCAL VOID SHKIShrinkEofTracingBegin( _In_ IFileSystemAPI * pfsapi, _In_ JET_PCWSTR wszDatabase, _Out_ CPRINTF** ppcprintfShrinkTraceRaw ) { - ERR err = JET_errSuccess; - - Call( ErrBeginDatabaseIncReseedTracing( pfsapi, wszDatabase, ppcprintfShrinkTraceRaw ) ); + (VOID)ErrBeginDatabaseIncReseedTracing( pfsapi, wszDatabase, ppcprintfShrinkTraceRaw ); (**ppcprintfShrinkTraceRaw)( "Beginning shrink pass.\r\n" ); - -HandleError: - return err; } -VOID SHKIShrinkEofTracingEnd( _Out_ CPRINTF** ppcprintfShrinkTraceRaw ) +LOCAL VOID SHKIShrinkEofTracingEnd( _Out_ CPRINTF** ppcprintfShrinkTraceRaw ) { if ( *ppcprintfShrinkTraceRaw ) { @@ -740,19 +736,34 @@ LOCAL ERR ErrSHKIMoveLastExtent( if ( FSPSpaceCatStrictlyInternal( spcatfCurrent ) || FSPSpaceCatStrictlyLeaf( spcatfCurrent ) ) { Assert( !FSPSpaceCatSmallSpace( spcatfCurrent ) ); + Assert( objidCurrent != objidNil ); + CPG cpgMoved = 0; fPageMove = fTrue; hrtPageMoveStart = HrtHRTCount(); const BOOL fSpacePage = FSPSpaceCatAnySpaceTree( spcatfCurrent ); + Assert( ( objidCurrent != objidSystemRoot ) || fSpacePage ); - err = ErrBTPageMove( + if ( fSpacePage || !FSPSpaceCatStrictlyLeaf( spcatfCurrent ) || !BoolParam( JET_paramFlight_ContiguousExtentMoveShrinkEnabled ) ) + { + err = ErrBTPageMove( fSpacePage ? pSpCatCtx->pfucbSpace : pSpCatCtx->pfucb, - *( pSpCatCtx->pbm ), + pSpCatCtx->pbmb->Bm(), pgnoCurrent, FSPSpaceCatStrictlyLeaf( spcatfCurrent ), fSPNoFlags, NULL ); + cpgMoved = 1; + } + else + { + err = ErrBTContiguousExtentMove( + pSpCatCtx->pfucb, + pSpCatCtx->pbmb->Bm(), + pgnoCurrent, + &cpgMoved ); + } // If this is a space tree, we may need to retry because reserving split buffers // for the move might have changed the page we're trying to move itself, which @@ -770,35 +781,38 @@ LOCAL ERR ErrSHKIMoveLastExtent( // exclusive access to the tree, so we must have been able to find the page // with the passed in bookmark in the expected conditions (i.e., strictly internal // or leaf). - AssertTrack( ( err != wrnBTShallowTree ) && ( err != JET_errRecordNotFound ), - OSFormat( ( err == wrnBTShallowTree ) ? + AssertTrack( ( err != errBTShallowTree ) && ( err != JET_errRecordNotFound ), + OSFormat( ( err == errBTShallowTree ) ? "ShrinkMoveUnexpectedShallowBt:0x%I32x" : "ShrinkMoveUnexpectedNotFound:0x%I32x", spcatfCurrent ) ); Call( err ); fMovedPage = fTrue; (*pcprintfShrinkTraceRaw)( "ShrinkMove[%I32u:%I32u:%d]\r\n", objidCurrent, pgnoCurrent, (int)spcatfCurrent ); - psems->cpgMoved++; + Assert( cpgMoved > 0 ); + psems->cpgMoved += cpgMoved; if ( FSPSpaceCatStrictlyLeaf( spcatfCurrent ) ) { if ( fSpacePage ) { - psems->cpgLeafSpaceMoved++; + Expected( cpgMoved == 1 ); + psems->cpgLeafSpaceMoved += cpgMoved; } else { - psems->cpgLeafMoved++; + psems->cpgLeafMoved += cpgMoved; } } else { + Expected( cpgMoved == 1 ); if ( fSpacePage ) { - psems->cpgInternalSpaceMoved++; + psems->cpgInternalSpaceMoved += cpgMoved; } else { - psems->cpgInternalMoved++; + psems->cpgInternalMoved += cpgMoved; } } continue; @@ -923,23 +937,22 @@ ERR ErrSHKShrinkDbFromEof( FMP* const pfmp = g_rgfmp + ifmp; INST* const pinst = pfmp->Pinst(); EXTENTINFO eiInitialOE, eiFinalOE; - QWORD cbSizeFileInitial = 0; - QWORD cbSizeFileFinal = 0; - QWORD cbSizeOwnedInitial = 0; - QWORD cbSizeOwnedFinal = 0; + QWORD cbSizeFileInitial = 0, cbSizeFileFinal = 0; + QWORD cbSizeOwnedInitial = 0, cbSizeOwnedFinal = 0; CPRINTF* pcprintfShrinkTraceRaw = NULL; const HRT hrtStarted = HrtHRTCount(); ShrinkExtMoveStats sems; ShrinkDoneReason sdr = sdrNone; PGNO pgnoFirstFromLastExtentShrunkPrev = pgnoNull; PGNO pgnoLastProcessed = pgnoNull; + PGNO pgnoShrinkTargetLast = pgnoMax; SpaceCategoryFlags spcatfLastProcessed = spcatfNone; HRT dhrtExtMaint = 0, dhrtFileTruncation = 0; BOOL fDbMayHaveChanged = fFalse; Assert( !pfmp->FIsTempDB() ); - Call( ErrSHKIShrinkEofTracingBegin( pinst->m_pfsapi, g_rgfmp[ ifmp ].WszDatabaseName(), &pcprintfShrinkTraceRaw ) ); + SHKIShrinkEofTracingBegin( pinst->m_pfsapi, g_rgfmp[ ifmp ].WszDatabaseName(), &pcprintfShrinkTraceRaw ); // First, delete any previously saved shrink archive files. if ( !BoolParam( pinst, JET_paramFlight_EnableShrinkArchiving ) ) @@ -966,6 +979,7 @@ ERR ErrSHKShrinkDbFromEof( // Open cursors to space trees. Call( ErrSPGetLastExtent( ppib, ifmp, &eiInitialOE ) ); + pgnoShrinkTargetLast = eiInitialOE.PgnoLast(); cbSizeOwnedInitial = OffsetOfPgno( eiInitialOE.PgnoLast() + 1 ); Assert( cbSizeOwnedInitial <= cbSizeFileInitial ); @@ -1028,6 +1042,11 @@ ERR ErrSHKShrinkDbFromEof( &sdr ), DoneWithDataMove ); + if ( pfmp->FPgnoShrinkTargetIsSet() ) + { + pgnoShrinkTargetLast = pfmp->PgnoShrinkTarget(); + } + Assert( pgnoFirstFromLastExtentTruncated != pgnoNull ); Assert( ( pgnoFirstFromLastExtentTruncated < pgnoFirstFromLastExtentTruncatedPrev ) || ( pgnoFirstFromLastExtentTruncatedPrev == pgnoNull ) ); @@ -1080,6 +1099,8 @@ ERR ErrSHKShrinkDbFromEof( &spcatfLastProcessed ), DoneWithDataMove ); Assert( pgnoFirstFromLastExtentMoved == pgnoFirstFromLastExtentTruncated ); + pgnoShrinkTargetLast = pfmp->FPgnoShrinkTargetIsSet() ? pfmp->PgnoShrinkTarget() : ( pgnoFirstFromLastExtentMoved - 1 ); + // We've got signaled to stop. if ( !pfmp->FShrinkIsActive() ) { @@ -1206,6 +1227,40 @@ ERR ErrSHKShrinkDbFromEof( // Emit event, except for when the database file was already small enough. if ( ( sdr != sdrReachedSizeLimit ) || fDbMayHaveChanged ) { + // Calculate total available space below the last shrink target. + CPG cpgAvailBelowShrinkTarget = 0; + if ( pgnoShrinkTargetLast == pgnoMax ) + { + EXTENTINFO ei; + if ( ErrSPGetLastExtent( ppib, ifmp, &ei ) >= JET_errSuccess ) + { + pgnoShrinkTargetLast = ei.PgnoLast(); + } + } + if ( pgnoShrinkTargetLast != pgnoMax ) + { + PIBTraceContextScope tcScope = ppib->InitTraceContextScope(); + tcScope->iorReason.SetIort( iortSpace ); + FUCB *pfucbRoot = pfucbNil, *pfucbRootAe = pfucbNil; + + if ( ( ErrBTIOpenAndGotoRoot( ppib, pgnoSystemRoot, ifmp, &pfucbRoot ) < JET_errSuccess ) || + !pfucbRoot->u.pfcb->FSpaceInitialized() || + ( ErrSPIOpenAvailExt( pfucbRoot, &pfucbRootAe ) < JET_errSuccess ) || + ( ErrSPIGetInfo( pfucbRootAe, pgnoShrinkTargetLast, &cpgAvailBelowShrinkTarget, NULL, NULL, NULL, 0, NULL, NULL, NULL ) < JET_errSuccess ) ) + { + cpgAvailBelowShrinkTarget = 0; + } + if ( pfucbRootAe != pfucbNil ) + { + BTClose( pfucbRootAe ); + } + if ( pfucbRoot != pfucbNil ) + { + pfucbRoot->pcsrRoot = pcsrNil; + BTClose( pfucbRoot ); + } + } + OSTraceSuspendGC(); const HRT dhrtElapsed = DhrtHRTElapsedFromHrtStart( hrtStarted ); const double dblSecTotalElapsed = DblHRTSecondsElapsed( dhrtElapsed ); @@ -1240,7 +1295,8 @@ ERR ErrSHKShrinkDbFromEof( OSFormatW( L"%lu", sems.cSmallSpaceTreesConverted ), OSFormatW( L"%d", sems.cpgRootMoved ), OSFormatW( L"%d", sems.cpgRootSpaceMoved ), OSFormatW( L"%d", sems.cpgInternalMoved ), OSFormatW( L"%d", sems.cpgInternalSpaceMoved ), - OSFormatW( L"%d", sems.cpgLeafMoved ), OSFormatW( L"%d", sems.cpgLeafSpaceMoved ) + OSFormatW( L"%d", sems.cpgLeafMoved ), OSFormatW( L"%d", sems.cpgLeafSpaceMoved ), + OSFormatW( L"%I64u", pfmp->CbOfCpg( cpgAvailBelowShrinkTarget ) ), OSFormatW( L"%d", cpgAvailBelowShrinkTarget ), }; UtilReportEvent( ( err < JET_errSuccess ) ? eventError : eventInformation, diff --git a/dev/ese/src/ese/fmp_test.cxx b/dev/ese/src/ese/fmp_test.cxx index b8d9df8d..25d500cf 100644 --- a/dev/ese/src/ese/fmp_test.cxx +++ b/dev/ese/src/ese/fmp_test.cxx @@ -409,6 +409,7 @@ JETUNITTEST( FMP, NewAndWriteLatch ) pfmp->SetPgnoShrinkTarget( 10 ); CHECK( pfmp->FShrinkIsActive() ); CHECK( pfmp->FPgnoShrinkTargetIsSet() ); + CHECK( 10 == pfmp->PgnoShrinkTarget() ); CHECK( !pfmp->FBeyondPgnoShrinkTarget( pgnoNull ) ); CHECK( !pfmp->FBeyondPgnoShrinkTarget( pgnoNull, 0 ) ); CHECK( !pfmp->FBeyondPgnoShrinkTarget( pgnoNull, 1 ) ); diff --git a/dev/ese/src/ese/jetapi.cxx b/dev/ese/src/ese/jetapi.cxx index 754e588d..6772b8e7 100644 --- a/dev/ese/src/ese/jetapi.cxx +++ b/dev/ese/src/ese/jetapi.cxx @@ -3229,21 +3229,10 @@ class CInstanceFileSystemConfiguration : public CDefaultFileSystemConfiguration if ( m_permillageSmoothIo == dwMax ) { // Exs: 999� = 99.9% Smooth, 990� = 99.0% Smooth, 900� = 90.0% Smooth. Debug default = 0.2% - ULONG permillageSmoothIo = OnDebugOrRetail( 2, CDefaultFileSystemConfiguration::PermillageSmoothIo() ); - - if ( m_pinst ) - { - if ( !FDefaultParam( m_pinst, JET_paramFlight_SmoothIoTestPermillage ) ) - { - permillageSmoothIo = (ULONG)UlParam( m_pinst, JET_paramFlight_SmoothIoTestPermillage ); - } - } - Assert( permillageSmoothIo != dwMax ); - - m_permillageSmoothIo = permillageSmoothIo; + m_permillageSmoothIo = OnDebugOrRetail( 2, CDefaultFileSystemConfiguration::PermillageSmoothIo() ); + Assert( m_permillageSmoothIo != dwMax ); } - Assert( m_permillageSmoothIo != dwMax ); return m_permillageSmoothIo; } @@ -7616,6 +7605,8 @@ const #define JET_paramFlight_RBSCleanupEnabledDEFAULT OnDebugOrRetail( fTrue, fFalse ) +#define JET_paramFlight_ContiguousExtentMoveShrinkEnabledDEFAULT OnDebugOrRetail( fTrue, fFalse ) + #define JET_paramFlight_UseCngAes256ImplementationDEFAULT OnDebugOrRetail( fTrue, fFalse ) // ================================================================ diff --git a/dev/ese/src/ese/lv.cxx b/dev/ese/src/ese/lv.cxx index ac45a0a6..d0405048 100644 --- a/dev/ese/src/ese/lv.cxx +++ b/dev/ese/src/ese/lv.cxx @@ -3095,7 +3095,7 @@ INLINE ERR ErrLVAppendChunks( if ( CpgDIRActiveSpaceRequestReserve( pfucbLV ) == cpgDIRReserveConsumed ) { // yay, we allocated contiguous pages for the LV. Turn off computations of LV reserve required. - DIRSetActiveSpaceRequestReserve( pfucbLV, 0 ); + DIRResetActiveSpaceRequestReserve( pfucbLV ); cpgRequiredReserve = 0; } @@ -3118,7 +3118,7 @@ INLINE ERR ErrLVAppendChunks( Assert( CpgDIRActiveSpaceRequestReserve( pfucbLV ) != cpgDIRReserveConsumed ); - DIRSetActiveSpaceRequestReserve( pfucbLV, 0 ); + DIRResetActiveSpaceRequestReserve( pfucbLV ); return err; } @@ -5668,7 +5668,7 @@ ERR ErrRECICreateLvRootAndChunks( // fits on a page. *pcpgLvSpaceRequired = 0; } - DIRSetActiveSpaceRequestReserve( pfucbLV, 0 ); + DIRResetActiveSpaceRequestReserve( pfucbLV ); } Assert( CpgDIRActiveSpaceRequestReserve( pfucbLV ) == 0 ); diff --git a/dev/ese/src/ese/old.cxx b/dev/ese/src/ese/old.cxx index 7d5f86bc..65603a80 100644 --- a/dev/ese/src/ese/old.cxx +++ b/dev/ese/src/ese/old.cxx @@ -2043,7 +2043,7 @@ LOCAL ERR ErrOLDDefragOneTree( err = ErrBTIMultipageCleanup( pfucb, bmStart, &bmNext, preccheck, &mergetype, fTrue ); BTUp( pfucb ); - if ( err < 0 ) + if ( err < JET_errSuccess ) { // if out of version store, try once to clean up if ( ( JET_errVersionStoreOutOfMemory == err || JET_errVersionStoreOutOfMemoryAndCleanupTimedOut == err ) @@ -5323,13 +5323,18 @@ ERR CTableDefragment::ErrPerformOneMerge_( PrereadInfo * const pPrereadInfo ) if ( mergetypeNone == mergetype || mergetypePartialLeft == mergetype ) { err = ErrBTPageMove( m_pfucbToDefrag, bmCurr, pgnoNull, fTrue, fSPContinuous, &bmNext ); - Call( err ); - - if ( err != wrnBTShallowTree ) + + if ( err >= JET_errSuccess ) { m_pold2Status->IncrementCpgMoved(); PERFOpt( cOLDPagesMoved.Inc( pinst ) ); } + else if ( err == errBTShallowTree ) + { + err = JET_errSuccess; + } + + Call( err ); } m_pold2Status->SetBookmark( bmNext ); diff --git a/dev/ese/src/ese/space.cxx b/dev/ese/src/ese/space.cxx index 0583cd59..4b7d5210 100644 --- a/dev/ese/src/ese/space.cxx +++ b/dev/ese/src/ese/space.cxx @@ -309,6 +309,12 @@ const CHAR * SzSpaceTreeType( const FUCB * const pfucb ) // class CSPExtentInfo; +LOCAL ERR ErrSPIFindExtOE( + __inout PIB * ppib, + _In_ FCB * pfcb, + _In_ const PGNO pgnoFirst, + _Out_ CSPExtentInfo * pcspoext ); + LOCAL ERR ErrSPIAddFreedExtent( FUCB *pfucb, FUCB *pfucbAE, @@ -334,7 +340,6 @@ LOCAL ERR ErrSPIGetFsSe( FUCB * const pfucbAE, const CPG cpgReq, const CPG cpgMin, - const ULONG fSPFlags, const BOOL fExact = fFalse, const BOOL fPermitAsyncExtension = fTrue, const BOOL fMayViolateMaxSize = fFalse ); @@ -377,17 +382,6 @@ LOCAL ERR ErrSPIUnshelvePagesInRange( const PGNO pgnoFirst, const PGNO pgnoLast ); -LOCAL ERR ErrSPIGetInfo( - FUCB *pfucb, - CPG *pcpgTotal, - CPG *pcpgReserved, - CPG *pcpgShelved, - INT *piext, - INT cext, - EXTENTINFO *rgext, - INT *pcextSentinelsRemaining, - CPRINTF * const pcprintf ); - #ifdef EXPENSIVE_INLINE_EXTENT_PAGE_COUNT_CACHE_VALIDATION // This function does very expensive validation of the value in the Extent Page // Count Cache by counting space tree pages inline with other space operations. @@ -1950,7 +1944,7 @@ class CSPExtentNodeKDF { class CSPExtentInfo; - ERR ErrConsumeSpace( _In_ const PGNO pgnoConsume, _In_ const CPG cpgConsume = 1 ) + ERR ErrConsumeSpace( _In_ const PGNO pgnoConsume, _In_ const CPG cpgConsume, _In_ const BOOL fDeleteInsertionMarker ) { ASSERT_VALID( this ); @@ -1970,8 +1964,9 @@ class CSPExtentNodeKDF { Assert( m_spextkey.FValid( m_eSpExtType, SPEXTKEY::fValidateData ) ); - if ( m_spextkey.SppPool() != spp::ContinuousPool && - CpgExtent() == 0 ) + // Leave insertion marker behind for continuous pools, except if we were working + // with explicit reservation. + if ( ( CpgExtent() == 0 ) && ( ( m_spextkey.SppPool() != spp::ContinuousPool ) || fDeleteInsertionMarker ) ) { m_fShouldDeleteNode = fTrue; } @@ -2411,6 +2406,37 @@ ERR ErrSPGetLastExtent( _Inout_ PIB * ppib, _In_ const IFMP ifmp, _Out_ EXTENTIN } +// Gets the FUCB-level extent that owns a specific pgno. +// Assumes that the caller guarantees that the page is known to be owned +// by the FUCB provided. +// +ERR ErrSPGetOwningExtent( _In_ FUCB * pfucb, _In_ const PGNO pgno, _Out_ EXTENTINFO * pextinfo ) +{ + ERR err = JET_errSuccess; + + Assert( pfucbNil != pfucb ); + Assert( !FSPIIsSmall( pfucb->u.pfcb ) ); + Assert( pfucb->u.pfcb->FSpaceInitialized() ); + Assert( pfucb->u.pfcb->PgnoOE() != pgnoNull ); + + CSPExtentInfo speiOE; + err = ErrSPIFindExtOE( pfucb->ppib, pfucb->u.pfcb, pgno, &speiOE ); + if ( ( err == JET_errNoCurrentRecord ) || ( err == JET_errRecordNotFound ) || + ( ( err >= JET_errSuccess ) && ( !speiOE.FIsSet() || !speiOE.FContains( pgno ) || ( speiOE.CpgExtent() <= 0 ) ) ) ) + { + FireWall( "GetOwningExtNoOwned" ); + Error( ErrERRCheck( JET_errSPOwnExtCorrupted ) ); + } + Call( err ); + + pextinfo->pgnoLastInExtent = speiOE.PgnoLast(); + pextinfo->cpgExtent = speiOE.CpgExtent(); + +HandleError: + return err; +} + + // Validate I have not unintentionally changed SPACE_HEADER size. C_ASSERT( sizeof(SPACE_HEADER) == 16 ); @@ -4418,10 +4444,10 @@ VOID SPFreeSpaceCatCtx( _Inout_ SpaceCatCtx** const ppSpCatCtx ) pSpCatCtx->pfucbParent = pfucbNil; pSpCatCtx->pfucb = pfucbNil; - if ( pSpCatCtx->pbm != NULL ) + if ( pSpCatCtx->pbmb != NULL ) { - delete pSpCatCtx->pbm; - pSpCatCtx->pbm = NULL; + delete pSpCatCtx->pbmb; + pSpCatCtx->pbmb = NULL; } delete pSpCatCtx; @@ -4480,7 +4506,7 @@ ERR ErrSPIGetSpaceCategoryObject( BOOL fPageLatched = fFalse; KEYDATAFLAGS kdf; SpaceCatCtx* pSpCatCtx = NULL; - BOOKMARK_COPY* pbm = NULL; + BOOKMARK_BUFFER* pbmb = NULL; Assert( objid != objidNil ); Assert( objid != objidParent ); @@ -4498,7 +4524,7 @@ ERR ErrSPIGetSpaceCategoryObject( *ppSpCatCtx = NULL; Alloc( pSpCatCtx = new SpaceCatCtx ); - Alloc( pbm = new BOOKMARK_COPY ); + Alloc( pbmb = new BOOKMARK_BUFFER ); // First, determine the pgnoFDP and initialize cursors. // @@ -4817,7 +4843,7 @@ ERR ErrSPIGetSpaceCategoryObject( goto HandleError; } - Call( pbm->ErrCopyKeyData( kdf.key, kdf.data ) ); + Call( pbmb->ErrAllocAndCopyKeyData( kdf.key, kdf.data ) ); } else { @@ -4834,7 +4860,7 @@ ERR ErrSPIGetSpaceCategoryObject( goto HandleError; } - Call( pbm->ErrCopyKey( kdf.key ) ); + Call( pbmb->ErrAllocAndCopyKey( kdf.key ) ); } // Release latch before navigating. @@ -4854,7 +4880,7 @@ ERR ErrSPIGetSpaceCategoryObject( } // Try the tree itself. - err = ErrBTContainsPage( pfucb, *pbm, pgno, fLeafPage ); + err = ErrBTContainsPage( pfucb, pbmb->Bm(), pgno, fLeafPage); if ( err >= JET_errSuccess ) { spcatf |= ( fLeafPage ? spcatfStrictlyLeaf : spcatfStrictlyInternal ); @@ -4877,7 +4903,7 @@ ERR ErrSPIGetSpaceCategoryObject( spcatf = spcatfInconsistent; goto HandleError; } - err = ErrBTContainsPage( pfucbSpace, *pbm, pgno, fLeafPage ); + err = ErrBTContainsPage( pfucbSpace, pbmb->Bm(), pgno, fLeafPage); if ( err >= JET_errSuccess ) { spcatf |= ( spcatfSpaceAE | ( fLeafPage ? spcatfStrictlyLeaf : spcatfStrictlyInternal ) ); @@ -4901,7 +4927,7 @@ ERR ErrSPIGetSpaceCategoryObject( spcatf = spcatfInconsistent; goto HandleError; } - err = ErrBTContainsPage( pfucbSpace, *pbm, pgno, fLeafPage ); + err = ErrBTContainsPage( pfucbSpace, pbmb->Bm(), pgno, fLeafPage); if ( err >= JET_errSuccess ) { spcatf |= ( spcatfSpaceOE | ( fLeafPage ? spcatfStrictlyLeaf : spcatfStrictlyInternal ) ); @@ -4974,10 +5000,10 @@ ERR ErrSPIGetSpaceCategoryObject( if ( ( err >= JET_errSuccess ) && !FSPSpaceCatStrictlyInternal( spcatf ) && !FSPSpaceCatStrictlyLeaf( spcatf ) && - ( pbm != NULL ) ) + ( pbmb != NULL ) ) { - delete pbm; - pbm = NULL; + delete pbmb; + pbmb = NULL; } // Fill out the context struct, either to clean it up or return it. @@ -4986,14 +5012,14 @@ ERR ErrSPIGetSpaceCategoryObject( pSpCatCtx->pfucbParent = pfucbParent; pSpCatCtx->pfucb = pfucb; pSpCatCtx->pfucbSpace = pfucbSpace; - pSpCatCtx->pbm = pbm; + pSpCatCtx->pbmb = pbmb; } else { Assert( pfucbParent == pfucbNil ); Assert( pfucb == pfucbNil ); Assert( pfucbSpace == pfucbNil ); - Assert( pbm == NULL ); + Assert( pbmb == NULL ); } if ( err >= JET_errSuccess ) @@ -5544,7 +5570,7 @@ ERR ErrSPGetSpaceCategory( } // We must have a bookmark if this is an internal or leaf page. - Assert( !FSPSpaceCatStrictlyInternal( spcatf ) && !FSPSpaceCatStrictlyLeaf( spcatf ) || ( pSpCatCtx->pbm != NULL ) ); + Assert( !FSPSpaceCatStrictlyInternal( spcatf ) && !FSPSpaceCatStrictlyLeaf( spcatf ) || ( pSpCatCtx->pbmb != NULL ) ); } else { @@ -6157,7 +6183,6 @@ LOCAL ERR ErrSPIGetExt( pfucbAE, *pcpgReq, cpgMin, - fSPFlags & ( fSPSplitting | fSPExactExtent ), fFalse, // fExact fTrue, // fPermitAsyncExtension fMayViolateMaxSize ) ); @@ -6238,7 +6263,7 @@ LOCAL ERR ErrSPIGetExt( CSPExtentNodeKDF spAdjustedSize( SPEXTKEY::fSPExtentTypeAE, cspaei.PgnoLast(), cspaei.CpgExtent(), spp::AvailExtLegacyGeneralPool ); OnDebug( const PGNO pgnoLastBefore = cspaei.PgnoLast() ); - Call( spAdjustedSize.ErrConsumeSpace( *ppgnoFirst, *pcpgReq ) ); + Call( spAdjustedSize.ErrConsumeSpace( *ppgnoFirst, *pcpgReq, fFalse /* fDeleteInsertionMarker */ ) ); Assert( spAdjustedSize.CpgExtent() > 0 ); Assert( pgnoLastBefore == cspaei.PgnoLast() ); @@ -6687,7 +6712,6 @@ ERR ErrSPIAEFindPage( switch ( err ) { - default: Assert( err < JET_errSuccess ); Assert( err != JET_errNoCurrentRecord ); @@ -6931,7 +6955,7 @@ ERR ErrSPIAEGetExtentAndPage( } else { - Call( ErrSPIGetFsSe( pfucb, pfucbAE, cpgRequest, cpgRequest, fSPFlags ) ); + Call( ErrSPIGetFsSe( pfucb, pfucbAE, cpgRequest, cpgRequest ) ); } Assert( Pcsr( pfucbAE )->FLatched() ); @@ -6953,14 +6977,16 @@ ERR ErrSPIAEGetContinuousPage( __inout FUCB * const pfucb, // needed for ErrSPIAEGetExtentAndPage() __inout FUCB * const pfucbAE, _In_ const PGNO pgnoLast, - _In_ const CPG cpgReserve, - _In_ const BOOL fHardReserve, // ensure reserve, even if next contiguous page is available. + _In_ const CPG cpgAddlReserve, + _In_ const BOOL fSPAllocFlags, _Out_ CSPExtentInfo * pspaeiAlloc ) { - ERR err = JET_errSuccess; - FCB * const pfcb = pfucbAE->u.pfcb; - CPG cpgEscalatingRequest = 0; + ERR err = JET_errSuccess; + FCB * const pfcb = pfucbAE->u.pfcb; + const CPG cpgFullReserveRequest = cpgAddlReserve ? ( cpgAddlReserve + 1 ) : 0; + CPG cpgEscalatingRequest = 0; + const BOOL fUseReserve = ( fSPAllocFlags & fSPUseActiveReserve ) != 0; Assert( pfucb ); Assert( pfucbAE ); @@ -6976,7 +7002,6 @@ ERR ErrSPIAEGetContinuousPage( // if ( cpgEscalatingRequest ) { - // We should have had success or cpgEscalatingRequest would not be set. CallS( err ); Assert( Pcsr( pfucbAE )->FLatched() ); @@ -6997,19 +7022,18 @@ ERR ErrSPIAEGetContinuousPage( Assert( !Pcsr( pfucbAE )->FLatched() ); - Call( err ); // materialize the ErrBTFlagDelete() error ... + Call( err ); // materialize the ErrSPIWrappedBTFlagDelete() error ... err = ErrERRCheck( errSPNoSpaceForYou ); - } else { cpgEscalatingRequest = 1; } - if ( fHardReserve && + if ( fUseReserve && err >= JET_errSuccess && - pspaeiAlloc->CpgExtent() < cpgReserve ) + pspaeiAlloc->CpgExtent() < cpgFullReserveRequest ) { pspaeiAlloc->Unset(); BTUp( pfucbAE ); @@ -7017,14 +7041,17 @@ ERR ErrSPIAEGetContinuousPage( err = ErrERRCheck( errSPNoSpaceForYou ); } - if ( errSPNoSpaceForYou == err ) + if ( errSPNoSpaceForYou != err ) + { + Call( err ); + } + else { - if ( cpgEscalatingRequest == 1 ) { - if ( cpgReserve ) + if ( cpgAddlReserve ) { - cpgEscalatingRequest += cpgReserve; + cpgEscalatingRequest += cpgAddlReserve; } else { @@ -7032,13 +7059,12 @@ ERR ErrSPIAEGetContinuousPage( } } - if ( fHardReserve && - cpgReserve != 0 && - cpgReserve != cpgEscalatingRequest ) + if ( fUseReserve && + cpgAddlReserve != 0 && + cpgEscalatingRequest != cpgFullReserveRequest ) { - const CPG cpgFullReserveRequest = ( cpgReserve + 1 ); // Made this strict, but we could entertain that the request is only 10% wastage or something. - if ( ( cpgEscalatingRequest % cpgFullReserveRequest ) != 0 ) + if ( ( fSPExactExtent & fSPAllocFlags ) || ( ( cpgEscalatingRequest % cpgFullReserveRequest ) != 0 ) ) { cpgEscalatingRequest = cpgFullReserveRequest; } @@ -7050,20 +7076,20 @@ ERR ErrSPIAEGetContinuousPage( // Note: we don't want ErrSPIGetSe() to resize our request, we've already decided // on a good size, so don't pass fSPOriginatingRequest. + const BOOL fHierarchicalSpaceAllocFlags = BoolParam( JET_paramFlight_HierarchicalSpaceAllocFlagsEnabled ); Call( ErrSPIAEGetExtentAndPage( pfucb, pfucbAE, spp::ContinuousPool, cpgEscalatingRequest, - fSPSplitting, + fSPSplitting | ( fHierarchicalSpaceAllocFlags ? fSPAllocFlags : fSPNoFlags ), pspaeiAlloc ) ); - + Assert( !fHierarchicalSpaceAllocFlags || !( fSPExactExtent & fSPAllocFlags ) || ( pspaeiAlloc->CpgExtent() == cpgFullReserveRequest ) ); } // We should have succeeded or have latched some space // CallS( err ); - Call( err ); Assert( Pcsr( pfucbAE )->FLatched() ); Assert( pspaeiAlloc->SppPool() == spp::ContinuousPool ); @@ -7149,7 +7175,7 @@ ERR ErrSPIAEGetPage( _In_ PGNO pgnoLast, __inout PGNO * ppgnoAlloc, _In_ const BOOL fSPAllocFlags, - _In_ const CPG cpgReserve + _In_ const CPG cpgAddlReserve ) { ERR err = JET_errSuccess; @@ -7174,7 +7200,7 @@ ERR ErrSPIAEGetPage( if ( fSPContinuous & fSPAllocFlags ) { - Call( ErrSPIAEGetContinuousPage( pfucb, pfucbAE, pgnoLast, cpgReserve, fSPAllocFlags & fSPUseActiveReserve, &cspaeiAlloc ) ); + Call( ErrSPIAEGetContinuousPage( pfucb, pfucbAE, pgnoLast, cpgAddlReserve, fSPAllocFlags, &cspaeiAlloc ) ); } else { @@ -7221,7 +7247,7 @@ ERR ErrSPIAEGetPage( cspaeiAlloc.CpgExtent(), cspaeiAlloc.SppPool() ); - Call( spAdjustedAvail.ErrConsumeSpace( cspaeiAlloc.PgnoFirst() ) ); + Call( spAdjustedAvail.ErrConsumeSpace( cspaeiAlloc.PgnoFirst(), 1, ( fSPAllocFlags & fSPUseActiveReserve ) != 0 ) ); if ( spAdjustedAvail.FDelete() ) { @@ -7321,9 +7347,11 @@ ERR ErrSPGetPage( // check for valid input // Assert( ppgnoAlloc != NULL ); - Assert( 0 == ( fSPAllocFlags & ~fMaskSPGetPage ) ); + Assert( 0 == ( fSPAllocFlags & ~fMaskSPGetPage ) ); // only valid options. + Assert( 0 == ( fSPAllocFlags & fSPUseActiveReserve ) || 0 != ( fSPAllocFlags & fSPContinuous ) || cpgDIRReserveConsumed == CpgDIRActiveSpaceRequestReserve( pfucb ) ); // fSPUseActiveReserve requires fSPContinuous to actually reserve space. - CPG cpgAddlReserve = 0; + + CPG cpgAddlReserve = 0; if ( fSPAllocFlags & fSPNewExtent ) { cpgAddlReserve = 15; @@ -12096,7 +12124,6 @@ ERR ErrSPExtendDB( pfucbAE, cpgSEMin, cpgSEMin, - 0, fTrue, fPermitAsyncExtension ) ); @@ -14004,7 +14031,6 @@ LOCAL ERR ErrSPIGetFsSe( FUCB * const pfucbAE, const CPG cpgReq, const CPG cpgMin, - const ULONG fSPFlags, const BOOL fExact, const BOOL fPermitAsyncExtension, const BOOL fMayViolateMaxSize ) @@ -14725,8 +14751,9 @@ LOCAL ERR ErrSPIExtGetExtentListInfo( } -LOCAL ERR ErrSPIGetInfo( +ERR ErrSPIGetInfo( FUCB *pfucb, + const PGNO pgnoHighest, CPG *pcpgTotal, CPG *pcpgReserved, CPG *pcpgShelved, @@ -14787,94 +14814,99 @@ LOCAL ERR ErrSPIGetInfo( { const CSPExtentInfo cspext( pfucb ); - if( pcprintf ) + if ( ( pgnoHighest == pgnoNull ) || ( !cspext.FEmptyExtent() && ( pgnoHighest >= cspext.PgnoFirst() ) ) ) { - CPG cpgSparse = 0; + Expected( ( pgnoHighest == pgnoNull ) || ( pgnoHighest >= cspext.PgnoLast() ) ); - if ( !cspext.FEmptyExtent() ) + if ( pcprintf ) { - (void) ErrSPIGetSparseInfoRange( &g_rgfmp[ pfucb->ifmp ], cspext.PgnoFirst(), cspext.PgnoLast(), &cpgSparse ); - } + CPG cpgSparse = 0; - if( pgnoLastSeen != Pcsr( pfucb )->Pgno() ) - { - pgnoLastSeen = Pcsr( pfucb )->Pgno(); + if ( !cspext.FEmptyExtent() ) + { + (void) ErrSPIGetSparseInfoRange( &g_rgfmp[ pfucb->ifmp ], cspext.PgnoFirst(), cspext.PgnoLast(), &cpgSparse ); + } - ++cpgSeen; - } + if( pgnoLastSeen != Pcsr( pfucb )->Pgno() ) + { + pgnoLastSeen = Pcsr( pfucb )->Pgno(); - (*pcprintf)( "%30s: %s[%5d]:\t%6d-%6d (%3d) %s%s", - SzNameOfTable( pfucb ), - SzSpaceTreeType( pfucb ), - Pcsr( pfucb )->Pgno(), - cspext.FEmptyExtent() ? 0 : cspext.PgnoFirst(), - cspext.FEmptyExtent() ? cspext.PgnoMarker() : cspext.PgnoLast(), - cspext.CpgExtent(), - FNDDeleted( pfucb->kdfCurr ) ? " (DEL)" : "", - ( cspext.ErrCheckCorrupted( ) < JET_errSuccess ) ? " (COR)" : "" - ); - if ( cspext.FNewAvailFormat() ) - { - (*pcprintf)( " Pool: %d %s", - cspext.SppPool(), - cspext.FNewAvailFormat() ? "(fNewAvailFormat)" : "" + ++cpgSeen; + } + + (*pcprintf)( "%30s: %s[%5d]:\t%6d-%6d (%3d) %s%s", + SzNameOfTable( pfucb ), + SzSpaceTreeType( pfucb ), + Pcsr( pfucb )->Pgno(), + cspext.FEmptyExtent() ? 0 : cspext.PgnoFirst(), + cspext.FEmptyExtent() ? cspext.PgnoMarker() : cspext.PgnoLast(), + cspext.CpgExtent(), + FNDDeleted( pfucb->kdfCurr ) ? " (DEL)" : "", + ( cspext.ErrCheckCorrupted( ) < JET_errSuccess ) ? " (COR)" : "" ); + if ( cspext.FNewAvailFormat() ) + { + (*pcprintf)( " Pool: %d %s", + cspext.SppPool(), + cspext.FNewAvailFormat() ? "(fNewAvailFormat)" : "" + ); + } + if ( cpgSparse > 0 ) + { + (*pcprintf)( " cpgSparse: %3d", cpgSparse ); + } + (*pcprintf)( "\n" ); + + ++cRecords; + if( FNDDeleted( pfucb->kdfCurr ) ) + { + ++cRecordsDeleted; + } } - if ( cpgSparse > 0 ) + + if ( cspext.SppPool() != spp::ShelvedPool ) { - (*pcprintf)( " cpgSparse: %3d", cpgSparse ); + *pcpgTotal += cspext.CpgExtent(); } - (*pcprintf)( "\n" ); - ++cRecords; - if( FNDDeleted( pfucb->kdfCurr ) ) + if ( pcpgReserved && cspext.FNewAvailFormat() && + ( cspext.SppPool() == spp::ContinuousPool ) ) { - ++cRecordsDeleted; + *pcpgReserved += cspext.CpgExtent(); } - } - - if ( cspext.SppPool() != spp::ShelvedPool ) - { - *pcpgTotal += cspext.CpgExtent(); - } - - if ( pcpgReserved && cspext.FNewAvailFormat() && - ( cspext.SppPool() == spp::ContinuousPool ) ) - { - *pcpgReserved += cspext.CpgExtent(); - } - BOOL fSuppressExtent = fFalse; + BOOL fSuppressExtent = fFalse; - if ( pcpgShelved && cspext.FNewAvailFormat() && - ( cspext.SppPool() == spp::ShelvedPool ) ) - { - if ( cspext.PgnoLast() > g_rgfmp[ pfucb->ifmp ].PgnoLast() ) + if ( pcpgShelved && cspext.FNewAvailFormat() && + ( cspext.SppPool() == spp::ShelvedPool ) ) { - Assert( cspext.PgnoFirst() > g_rgfmp[ pfucb->ifmp ].PgnoLast() ); - *pcpgShelved += cspext.CpgExtent(); - } - else - { - fSuppressExtent = fTrue; + if ( cspext.PgnoLast() > g_rgfmp[ pfucb->ifmp ].PgnoLast() ) + { + Assert( cspext.PgnoFirst() > g_rgfmp[ pfucb->ifmp ].PgnoLast() ); + *pcpgShelved += cspext.CpgExtent(); + } + else + { + fSuppressExtent = fTrue; + } } - } - - if ( fExtentList && !fSuppressExtent ) - { - Assert( iext < cext ); - // be sure to leave space for the sentinels - // (if no more room, we still want to keep - // calculating page count - we just can't - // keep track of individual extents anymore - // - Assert( iext + *pcextSentinelsRemaining <= cext ); - if ( iext + *pcextSentinelsRemaining < cext ) + if ( fExtentList && !fSuppressExtent ) { - rgext[iext].pgnoLastInExtent = cspext.PgnoLast(); - rgext[iext].cpgExtent = cspext.CpgExtent(); - iext++; + Assert( iext < cext ); + + // be sure to leave space for the sentinels + // (if no more room, we still want to keep + // calculating page count - we just can't + // keep track of individual extents anymore + // + Assert( iext + *pcextSentinelsRemaining <= cext ); + if ( iext + *pcextSentinelsRemaining < cext ) + { + rgext[iext].pgnoLastInExtent = cspext.PgnoLast(); + rgext[iext].cpgExtent = cspext.CpgExtent(); + iext++; + } } } @@ -14882,7 +14914,9 @@ LOCAL ERR ErrSPIGetInfo( if ( err < 0 ) { if ( err != JET_errNoCurrentRecord ) + { goto HandleError; + } break; } } @@ -15670,6 +15704,7 @@ ERR ErrSPGetInfo( Call( ErrSPIGetInfo( pfucbSpace, + pgnoNull, pcpgOwnExtTotal, NULL, NULL, @@ -15827,6 +15862,7 @@ ERR ErrSPGetInfo( Call( ErrSPIGetInfo( pfucbSpace, + pgnoNull, pcpgAvailExtTotal, pcpgReservedExtTotal, pcpgShelvedExtTotal, @@ -16065,6 +16101,7 @@ ERR ErrSPIGetCpgOwnedAndAvail( Call( ErrSPIGetInfo( pfucbOE, + pgnoNull, pcpgOwnExtTotal, NULL, NULL, @@ -16076,6 +16113,7 @@ ERR ErrSPIGetCpgOwnedAndAvail( Call( ErrSPIGetInfo( pfucbAE, + pgnoNull, pcpgAvailExtTotal, NULL, NULL, diff --git a/dev/ese/src/ese/sysparamtable.g.cxx b/dev/ese/src/ese/sysparamtable.g.cxx index 4da5acbb..49ae6f6e 100644 --- a/dev/ese/src/ese/sysparamtable.g.cxx +++ b/dev/ese/src/ese/sysparamtable.g.cxx @@ -63,7 +63,7 @@ JetParam g_rgparamRaw[] = NORMAL_PARAM(JET_paramDeleteOutOfRangeLogs, CJetParam::typeBoolean, 0, 0, 0, 1, 0, -1, 0), NORMAL_PARAM(JET_paramAccessDeniedRetryPeriod, CJetParam::typeInteger, 0, 1, 0, 0, 0, -1, 10000), NORMAL_PARAM(JET_paramEnableIndexCleanup, CJetParam::typeBoolean, 0, 0, 0, 0, 0, -1, 1), - NORMAL_PARAM(JET_paramFlight_SmoothIoTestPermillage, CJetParam::typeInteger, 1, 0, 0, 1, 0, 1000, 0), + NORMAL_PARAM(JET_paramFlight_HierarchicalSpaceAllocFlagsEnabled, CJetParam::typeBoolean, 1, 1, 0, 1, 0, -1, fTrue), NORMAL_PARAM(JET_paramElasticWaypointLatency, CJetParam::typeInteger, 1, 0, 0, 0, 0, 10, JET_paramElasticWaypointLatency_DEFAULT), NORMAL_PARAM(JET_paramFlight_SynchronousLVCleanup, CJetParam::typeBoolean, 1, 0, 0, 0, 0, -1, 0), NORMAL_PARAM(JET_paramFlight_RBSRevertIOUrgentLevel, CJetParam::typeInteger, 1, 0, 0, 0, 0, 127, 8), @@ -241,6 +241,7 @@ JetParam g_rgparamRaw[] = NORMAL_PARAM(JET_paramDeferredIndexPopulateRowsPerTransaction, CJetParam::typeInteger, 1, 0, 0, 0, 1, 2147483647, 1000), NORMAL_PARAM(JET_paramEnableBlockCacheDetach, CJetParam::typeBoolean, 1, 1, 1, 0, 0, 1, 0), NORMAL_PARAM(JET_paramFlight_UseCngAes256Implementation, CJetParam::typeBoolean, 0, 1, 0, 0, 0, 1, JET_paramFlight_UseCngAes256ImplementationDEFAULT), + NORMAL_PARAM(JET_paramFlight_ContiguousExtentMoveShrinkEnabled, CJetParam::typeBoolean, 1, 1, 0, 1, 0, -1, JET_paramFlight_ContiguousExtentMoveShrinkEnabledDEFAULT), ILLEGAL_PARAM(JET_paramMaxValueInvalid), }; @@ -302,7 +303,7 @@ static_assert( JET_paramEventLoggingLevel == 51, "The order of defintion for JET static_assert( JET_paramDeleteOutOfRangeLogs == 52, "The order of defintion for JET_paramDeleteOutOfRangeLogs in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); static_assert( JET_paramAccessDeniedRetryPeriod == 53, "The order of defintion for JET_paramAccessDeniedRetryPeriod in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); static_assert( JET_paramEnableIndexCleanup == 54, "The order of defintion for JET_paramEnableIndexCleanup in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); -static_assert( JET_paramFlight_SmoothIoTestPermillage == 55, "The order of defintion for JET_paramFlight_SmoothIoTestPermillage in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); +static_assert( JET_paramFlight_HierarchicalSpaceAllocFlagsEnabled == 55, "The order of defintion for JET_paramFlight_HierarchicalSpaceAllocFlagsEnabled in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); static_assert( JET_paramElasticWaypointLatency == 56, "The order of defintion for JET_paramElasticWaypointLatency in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); static_assert( JET_paramFlight_SynchronousLVCleanup == 57, "The order of defintion for JET_paramFlight_SynchronousLVCleanup in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); static_assert( JET_paramFlight_RBSRevertIOUrgentLevel == 58, "The order of defintion for JET_paramFlight_RBSRevertIOUrgentLevel in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); @@ -469,4 +470,5 @@ static_assert( JET_paramEnableBlockCache == 218, "The order of defintion for JET static_assert( JET_paramDeferredIndexPopulateRowsPerTransaction == 219, "The order of defintion for JET_paramDeferredIndexPopulateRowsPerTransaction in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); static_assert( JET_paramEnableBlockCacheDetach == 220, "The order of defintion for JET_paramEnableBlockCacheDetach in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); static_assert( JET_paramFlight_UseCngAes256Implementation == 221, "The order of defintion for JET_paramFlight_UseCngAes256Implementation in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); -static_assert( JET_paramMaxValueInvalid == 222, "The order of defintion for JET_paramMaxValueInvalid in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); +static_assert( JET_paramFlight_ContiguousExtentMoveShrinkEnabled == 222, "The order of defintion for JET_paramFlight_ContiguousExtentMoveShrinkEnabled in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); +static_assert( JET_paramMaxValueInvalid == 223, "The order of defintion for JET_paramMaxValueInvalid in sysparam.xml must follow the numerical ordering of its value (as defined in jethdr.w)." ); diff --git a/dev/ese/src/inc/_space.hxx b/dev/ese/src/inc/_space.hxx index 26137eac..e4e4d929 100644 --- a/dev/ese/src/inc/_space.hxx +++ b/dev/ese/src/inc/_space.hxx @@ -146,6 +146,18 @@ ERR ErrSPIOpenOwnExt( FUCB *pfucb, FUCB **ppfucbOE ); +ERR ErrSPIGetInfo( + FUCB *pfucb, + const PGNO pgnoHighest, + CPG *pcpgTotal, + CPG *pcpgReserved, + CPG *pcpgShelved, + INT *piext, + INT cext, + EXTENTINFO *rgext, + INT *pcextSentinelsRemaining, + CPRINTF * const pcprintf ); + ERR ErrSPIGetExtentInfo( _In_ const FUCB *pfucb, _Out_ PGNO *ppgnoLast, diff --git a/dev/ese/src/inc/bt.hxx b/dev/ese/src/inc/bt.hxx index fb831eb2..eca5784c 100644 --- a/dev/ese/src/inc/bt.hxx +++ b/dev/ese/src/inc/bt.hxx @@ -312,13 +312,13 @@ struct PrereadInfo ERR ErrBTIMultipageCleanup( - FUCB * const pfucb, - const BOOKMARK& bm, - BOOKMARK * const pbmNext, - RECCHECK * const preccheck, - MERGETYPE * const pmergetype, - const BOOL fRightMerges, - __inout_opt PrereadInfo * const pPrereadInfo = NULL ); + FUCB * const pfucb, + const BOOKMARK& bm, + BOOKMARK * const pbmNext, + RECCHECK * const preccheck, + MERGETYPE * const pmergetype, + const BOOL fRightMerges, + __inout_opt PrereadInfo * const pPrereadInfo = NULL ); ERR ErrBTPageMove( _In_ FUCB * const pfucb, @@ -326,9 +326,15 @@ ERR ErrBTPageMove( _In_ const PGNO pgnoSource, _In_ const BOOL fLeafPage, _In_ const ULONG fSPAllocFlags, - __inout BOOKMARK * const pbmNext ); + _Inout_ BOOKMARK * const pbmNext ); VOID BTPerformPageMove( _In_ MERGEPATH * const pmergePath ); +ERR ErrBTContiguousExtentMove( + _In_ FUCB * const pfucb, + _In_ const BOOKMARK& bm, + _In_ const PGNO pgnoSourceFirst, + _Out_ CPG * const pcpgMoved ); + ERR ErrBTFindFragmentedRange( _In_ FUCB * const pfucb, _In_ const BOOKMARK& bmStart, diff --git a/dev/ese/src/inc/daedef.hxx b/dev/ese/src/inc/daedef.hxx index 4a173991..c47ac146 100644 --- a/dev/ese/src/inc/daedef.hxx +++ b/dev/ese/src/inc/daedef.hxx @@ -1258,7 +1258,7 @@ INLINE VOID DATA::CopyInto( DATA& dataDest ) const INLINE VOID DATA::Nullify() // ================================================================ { - m_pv = 0; + m_pv = NULL; m_cb = 0; } @@ -1559,6 +1559,9 @@ class BOOKMARK DATA data; VOID Nullify (); + VOID Reset (); + BOOL FNull () const; + #ifdef DEBUG public: VOID Invalidate (); @@ -1575,6 +1578,24 @@ INLINE VOID BOOKMARK::Nullify() data.Nullify(); } +// ================================================================ +INLINE VOID BOOKMARK::Reset() +// ================================================================ +{ + key.prefix.SetCb( 0 ); + key.suffix.SetCb( 0 ); + data.SetCb( 0 ); +} + +// ================================================================ +INLINE BOOL BOOKMARK::FNull() const +// ================================================================ +{ + Assert( data.FNull() || !key.FNull() ); + return key.FNull() && data.FNull(); +} + + #ifdef DEBUG // ================================================================ @@ -1599,124 +1620,216 @@ INLINE VOID BOOKMARK::AssertValid() const // ================================================================ -class BOOKMARK_COPY : public BOOKMARK +class BOOKMARK_BUFFER // ================================================================ // -// copy of a bookmark's content to the heap. +// a buffer to hold a bookmark's content in the heap. // //- { public: - BOOKMARK_COPY(); - ~BOOKMARK_COPY(); - ERR ErrCopyKey( const KEY& keySrc ); - ERR ErrCopyKeyData( const KEY& keySrc, const DATA& dataSrc ); - VOID FreeCopy(); + // ctor/dtor. + BOOKMARK_BUFFER(); + ~BOOKMARK_BUFFER(); + + // Disallow copy. + BOOKMARK_BUFFER& operator=( const BOOKMARK_BUFFER& ) = delete; + + public: + // Functional methods. + ERR ErrAllocBuffer(); + VOID FreeBuffer(); + VOID CopyKeyData( const KEY& keySrc, const DATA& dataSrc ); + ERR ErrAllocAndCopyKey( const KEY& keySrc ); + ERR ErrAllocAndCopyKeyData( const KEY& keySrc, const DATA& dataSrc ); + VOID CopyInto( BOOKMARK_BUFFER* const pbmDest ) const; + VOID NullifyAndSetPvsToBuffer(); + const BOOKMARK& Bm() const; + BOOKMARK* Pbm(); private: + BOOKMARK m_bm; BYTE* m_pb; }; // ================================================================ -INLINE BOOKMARK_COPY::BOOKMARK_COPY() +INLINE BOOKMARK_BUFFER::BOOKMARK_BUFFER() // ================================================================ { m_pb = NULL; - OnDebug( Invalidate() ); + OnDebug( m_bm.Invalidate() ); } // ================================================================ -INLINE BOOKMARK_COPY::~BOOKMARK_COPY() +INLINE BOOKMARK_BUFFER::~BOOKMARK_BUFFER() // ================================================================ { - FreeCopy(); + FreeBuffer(); } // ================================================================ -INLINE ERR BOOKMARK_COPY::ErrCopyKey( const KEY& keySrc ) +INLINE ERR BOOKMARK_BUFFER::ErrAllocBuffer() // ================================================================ { - DATA dataSrc; - dataSrc.Nullify(); - return ErrCopyKeyData( keySrc, dataSrc ); + ERR err = JET_errSuccess; + + Assert( m_pb == NULL ); + Alloc( m_pb = (BYTE *)RESBOOKMARK.PvRESAlloc() ); + m_bm.Nullify(); + ASSERT_VALID( &m_bm ); + +HandleError: + return err; } // ================================================================ -INLINE ERR BOOKMARK_COPY::ErrCopyKeyData( const KEY& keySrc, const DATA& dataSrc ) +INLINE VOID BOOKMARK_BUFFER::FreeBuffer() +// ================================================================ +{ + OnDebug( m_bm.Invalidate() ); + if ( m_pb == NULL ) + { + return; + } + + RESBOOKMARK.Free( m_pb ); + m_pb = NULL; +} + +// ================================================================ +INLINE VOID BOOKMARK_BUFFER::CopyKeyData( const KEY& keySrc, const DATA& dataSrc ) // ================================================================ { - ERR err = JET_errSuccess; ASSERT_VALID( &keySrc ); ASSERT_VALID( &dataSrc ); - Assert( m_pb == NULL ); + Assert( m_pb != NULL ); + + m_bm.Nullify(); if ( keySrc.FNull() && dataSrc.FNull() ) { - Nullify(); - goto HandleError; + return; } // Make sure we are able to hold our key in RESBOOKMARK (never expected to fail). const DWORD_PTR cb = keySrc.Cb() + dataSrc.Cb(); DWORD_PTR cbMax = 0; CallS( RESBOOKMARK.ErrGetParam( JET_resoperSize, &cbMax ) ); - if ( cb > cbMax ) - { - Assert( fFalse ); - Error( ErrERRCheck( JET_errOutOfMemory ) ); - } + EnforceSz( cb <= cbMax, "BookmarkBufferTooSmall" ); - // Allocate memory. - Alloc( m_pb = (BYTE *)RESBOOKMARK.PvRESAlloc() ); - Nullify(); BYTE* pb = m_pb; // Copy key prefix. if ( !keySrc.prefix.FNull() ) { - key.prefix.SetPv( pb ); - key.prefix.SetCb( keySrc.prefix.Cb() ); - UtilMemCpy( pb, keySrc.prefix.Pv(), key.prefix.Cb() ); - pb += key.prefix.Cb(); + m_bm.key.prefix.SetPv( pb ); + m_bm.key.prefix.SetCb( keySrc.prefix.Cb() ); + UtilMemCpy( pb, keySrc.prefix.Pv(), m_bm.key.prefix.Cb() ); + pb += m_bm.key.prefix.Cb(); } // Copy key suffix. if ( !keySrc.suffix.FNull() ) { - key.suffix.SetPv( pb ); - key.suffix.SetCb( keySrc.suffix.Cb() ); - UtilMemCpy( pb, keySrc.suffix.Pv(), key.suffix.Cb() ); - pb += key.suffix.Cb(); + m_bm.key.suffix.SetPv( pb ); + m_bm.key.suffix.SetCb( keySrc.suffix.Cb() ); + UtilMemCpy( pb, keySrc.suffix.Pv(), m_bm.key.suffix.Cb() ); + pb += m_bm.key.suffix.Cb(); } // Copy data. if ( !dataSrc.FNull() ) { - data.SetPv( pb ); - data.SetCb( dataSrc.Cb() ); - UtilMemCpy( pb, dataSrc.Pv(), data.Cb() ); - pb += data.Cb(); + m_bm.data.SetPv( pb ); + m_bm.data.SetCb( dataSrc.Cb() ); + UtilMemCpy( pb, dataSrc.Pv(), m_bm.data.Cb() ); + pb += m_bm.data.Cb(); } Assert( (DWORD_PTR)( pb - m_pb ) == cb ); - ASSERT_VALID( this ); + ASSERT_VALID( &m_bm ); +} -HandleError: - return err; +// ================================================================ +INLINE ERR BOOKMARK_BUFFER::ErrAllocAndCopyKey( const KEY& keySrc ) +// ================================================================ +{ + DATA dataSrc; + dataSrc.Nullify(); + return ErrAllocAndCopyKeyData( keySrc, dataSrc ); } // ================================================================ -INLINE VOID BOOKMARK_COPY::FreeCopy() +INLINE ERR BOOKMARK_BUFFER::ErrAllocAndCopyKeyData( const KEY& keySrc, const DATA& dataSrc ) // ================================================================ { - OnDebug( Invalidate() ); + ERR err = JET_errSuccess; + + Assert( m_pb == NULL ); + + ASSERT_VALID( &keySrc ); + ASSERT_VALID( &dataSrc ); + + if ( keySrc.FNull() && dataSrc.FNull() ) + { + m_bm.Nullify(); + goto HandleError; + } + + // Make sure we are able to hold our key in RESBOOKMARK (never expected to fail). + const DWORD_PTR cb = keySrc.Cb() + dataSrc.Cb(); + DWORD_PTR cbMax = 0; + CallS( RESBOOKMARK.ErrGetParam( JET_resoperSize, &cbMax ) ); + if ( cb > cbMax ) + { + Assert( fFalse ); + Error( ErrERRCheck( JET_errOutOfMemory ) ); + } + if ( m_pb == NULL ) { - return; + Call( ErrAllocBuffer() ); } - RESBOOKMARK.Free( m_pb ); - m_pb = NULL; + CopyKeyData( keySrc, dataSrc ); + +HandleError: + return err; +} + +// ================================================================ +INLINE VOID BOOKMARK_BUFFER::CopyInto( BOOKMARK_BUFFER* const pbmDest ) const +// ================================================================ +{ + pbmDest->CopyKeyData( m_bm.key, m_bm.data ); +} + +// ================================================================ +INLINE VOID BOOKMARK_BUFFER::NullifyAndSetPvsToBuffer() +// ================================================================ +{ + Assert( m_pb != NULL ); + ASSERT_VALID( &m_bm ); + + m_bm.Nullify(); + + m_bm.key.prefix.SetPv( m_pb ); + m_bm.key.suffix.SetPv( m_pb ); + m_bm.data.SetPv( m_pb ); +} + +// ================================================================ +INLINE const BOOKMARK& BOOKMARK_BUFFER::Bm() const +// ================================================================ +{ + return m_bm; +} + +// ================================================================ +INLINE BOOKMARK* BOOKMARK_BUFFER::Pbm() +// ================================================================ +{ + return &m_bm; } @@ -1880,7 +1993,7 @@ INLINE INT CmpKeyShortest( const KEY& key1, const KEY& key2 ) INT cbCompare = pkeySmallestPrefix->prefix.Cb(); INT cmp = 0; - if ( pb1 == pb2 || ( cmp = memcmp( pb1, pb2, cbCompare)) == 0 ) + if ( pb1 == pb2 || ( cmp = memcmp( pb1, pb2, cbCompare ) ) == 0 ) { pb1 = (BYTE *)pkeySmallestPrefix->suffix.Pv(); pb2 += cbCompare; diff --git a/dev/ese/src/inc/fmp.hxx b/dev/ese/src/inc/fmp.hxx index c7337f99..52135fd9 100644 --- a/dev/ese/src/inc/fmp.hxx +++ b/dev/ese/src/inc/fmp.hxx @@ -1213,6 +1213,12 @@ public: return ( m_pgnoShrinkTarget != pgnoNull ); } + PGNO PgnoShrinkTarget() const + { + Assert( FPgnoShrinkTargetIsSet() ); + return m_pgnoShrinkTarget; + } + BOOL FBeyondPgnoShrinkTarget( const PGNO pgnoFirst, const CPG cpg = 1 ) const { Assert( !FPgnoShrinkTargetIsSet() || m_fShrinkIsRunning ); diff --git a/dev/ese/src/inc/space.hxx b/dev/ese/src/inc/space.hxx index 80087b22..c6c32999 100644 --- a/dev/ese/src/inc/space.hxx +++ b/dev/ese/src/inc/space.hxx @@ -150,6 +150,7 @@ ERR ErrSPInitFCB( _Inout_ FUCB * const pfucb ); ERR ErrSPDeferredInitFCB( _Inout_ FUCB * const pfucb ); ERR ErrSPGetLastPgno( _Inout_ PIB * ppib, _In_ const IFMP ifmp, _Out_ PGNO * ppgno ); ERR ErrSPGetLastExtent( _Inout_ PIB * ppib, _In_ const IFMP ifmp, _Out_ EXTENTINFO * pextinfo ); +ERR ErrSPGetOwningExtent( _In_ FUCB* pfucb, _In_ const PGNO pgno, _Out_ EXTENTINFO* pextinfo ); // Flags related to page or extent allocation. // @@ -299,7 +300,7 @@ typedef struct SpaceCatCtx FUCB* pfucbParent; FUCB* pfucb; FUCB* pfucbSpace; - BOOKMARK_COPY* pbm; + BOOKMARK_BUFFER* pbmb; } SpaceCatCtx; // Frees a SpaceCatCtx handle. diff --git a/dev/ese/src/noncore/interop/params.h b/dev/ese/src/noncore/interop/params.h index 25ce8a68..b610c549 100644 --- a/dev/ese/src/noncore/interop/params.h +++ b/dev/ese/src/noncore/interop/params.h @@ -127,7 +127,7 @@ MSINTERNAL enum class MJET_PARAM AccessDeniedRetryPeriod = 53, // Number of milliseconds to retry when about to fail with AccessDenied EnableIndexChecking = 45, // Enable checking OS version for indexes (false by default). EnableIndexCleanup = 54, // Enable cleanup of out-of-date index entries (Windows 2003 through Windows 7); Does NLS version checking (Windows 2003 and later). - Flight_SmoothIoTestPermillage = 55, // The per mille of total (or one thousandths, or tenths of a percent) of IO should be made smooth. Ex(s): 995(/1000) = 99.5% smooth, 10(/1000) = 1%, etc. 0 = disabled. + Flight_HierarchicalSpaceAllocFlagsEnabled = 55, // Whether we want to pass the space allocation flags along when asking for space to the parent of an object. ElasticWaypointLatency = 56, // Amount of extra elastic waypoint latency Flight_SynchronousLVCleanup = 57, // Perform synchronous cleanup (actual delete) of LVs instead of flag delete with cleanup happening later Flight_RBSRevertIOUrgentLevel = 58, // IO urgent level for reverting the databases using RBS. Used to decide how many outstanding I/Os will be allowed. @@ -243,7 +243,8 @@ MSINTERNAL enum class MJET_PARAM DeferredIndexPopulateRowsPerTransaction = 219, // Number of primary index rows to process in a single transaction when processing EnableBlockCacheDetach = 220, // Indicates that ESE Block Cache detach is enabled. This will allow a file cached by the ESE Block Cache to be detached on open. Flight_UseCngAes256Implementation = 221, // Whether to use the CNG based implementation (rather than CAPI based one) for AES256 encryption - MaxValueInvalid = 222, // This is not a valid parameter. It can change from release to release! + Flight_ContiguousExtentMoveShrinkEnabled = 222, // Whether we want to move contiguous pages to a contiguous destination extent during DB Shrink. + MaxValueInvalid = 223, // This is not a valid parameter. It can change from release to release! }; }