diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h index 1ecff79f2d200..5f56e3f272473 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h @@ -31,7 +31,11 @@ class TimeFrameGPU : public TimeFrame using typename TimeFrame::ROFOverlapTableN; using typename TimeFrame::ROFVertexLookupTableN; using typename TimeFrame::ROFMaskTableN; + using typename TimeFrame::TrackingTopologyN; using typename TimeFrame::TrackSeedN; + static constexpr int MaxTransitions = TrackingTopologyN::MaxTransitions; + static constexpr int MaxCells = TrackingTopologyN::MaxCells; + static constexpr int MaxStreams = MaxCells > NLayers ? MaxCells : NLayers; public: TimeFrameGPU() = default; @@ -43,7 +47,9 @@ class TimeFrameGPU : public TimeFrame void registerHostMemory(const int); void unregisterHostMemory(const int); void initialise(const TrackingParameters&, int maxLayers); + void initialise(const TrackingParameters&, int maxLayers, int iteration); void loadIndexTableUtils(); + void loadTrackingTopologies(); void loadTrackingFrameInfoDevice(const int); void createTrackingFrameInfoDeviceArray(); void loadUnsortedClustersDevice(const int); @@ -85,7 +91,7 @@ class TimeFrameGPU : public TimeFrame void createNeighboursLUTDevice(const int, const unsigned int); void createTrackITSExtDevice(const size_t); void downloadTrackITSExtDevice(); - void downloadCellsNeighboursDevice(std::vector>>&, const int); + void downloadCellsNeighboursDevice(std::vector>&, const int); void downloadNeighboursLUTDevice(bounded_vector&, const int); void downloadCellsDevice(); void downloadCellsLUTDevice(); @@ -109,6 +115,7 @@ class TimeFrameGPU : public TimeFrame const auto getDeviceROFOverlapTableView() { return mDeviceROFOverlapTableView; } const auto getDeviceROFVertexLookupTableView() { return mDeviceROFVertexLookupTableView; } const auto getDeviceROFMaskTableView() { return mDeviceROFMaskTableView; } + const auto getDeviceTrackingTopologyView() const { return mDeviceTrackingTopologyView; } int* getDeviceROFramesClusters(const int layer) { return mROFramesClustersDevice[layer]; } auto& getTrackITSExt() { return mTrackITSExt; } Vertex* getDeviceVertices() { return mPrimaryVerticesDevice; } @@ -120,10 +127,9 @@ class TimeFrameGPU : public TimeFrame TrackITSExt* getDeviceTrackITSExt() { return mTrackITSExtDevice; } int* getDeviceNeighboursLUT(const int layer) { return mNeighboursLUTDevice[layer]; } gsl::span getDeviceNeighboursLUTs() { return mNeighboursLUTDevice; } - gpuPair* getDeviceNeighbourPairs(const int layer) { return mNeighbourPairsDevice[layer]; } - std::array& getDeviceNeighboursAll() { return mNeighboursDevice; } - int* getDeviceNeighbours(const int layer) { return mNeighboursDevice[layer]; } - int** getDeviceNeighboursArray() { return mNeighboursDevice.data(); } + CellNeighbour** getDeviceArrayNeighbours() { return mNeighboursDeviceArray; } + std::array& getDeviceNeighboursAll() { return mNeighboursDevice; } + CellNeighbour* getDeviceNeighbours(const int layer) { return mNeighboursDevice[layer]; } TrackingFrameInfo* getDeviceTrackingFrameInfo(const int); const TrackingFrameInfo** getDeviceArrayTrackingFrameInfo() const { return mTrackingFrameInfoDeviceArray; } const Cluster** getDeviceArrayClusters() const { return mClustersDeviceArray; } @@ -147,10 +153,10 @@ class TimeFrameGPU : public TimeFrame void setDevicePropagator(const o2::base::PropagatorImpl* p) final { this->mPropagatorDevice = p; } // Host-specific getters - gsl::span getNTracklets() { return mNTracklets; } - gsl::span getNCells() { return mNCells; } + gsl::span getNTracklets() { return {mNTracklets.data(), static_cast::size_type>(this->mTrackingTopologyView.nTransitions)}; } + gsl::span getNCells() { return {mNCells.data(), static_cast::size_type>(this->mTrackingTopologyView.nCells)}; } auto& getArrayNCells() { return mNCells; } - gsl::span getNNeighbours() { return mNNeighbours; } + gsl::span getNNeighbours() { return {mNNeighbours.data(), static_cast::size_type>(this->mTrackingTopologyView.nCells)}; } auto& getArrayNNeighbours() { return mNNeighbours; } // Host-available device getters @@ -169,9 +175,9 @@ class TimeFrameGPU : public TimeFrame void allocMem(void**, size_t, bool, int32_t = o2::gpu::GPUMemoryResource::MEMORY_GPU); // Abstract owned and unowned memory allocations on default stream // Host-available device buffer sizes - std::array mNTracklets; - std::array mNCells; - std::array mNNeighbours; + std::array mNTracklets{}; + std::array mNCells{}; + std::array mNNeighbours{}; // Device pointers IndexTableUtilsN* mIndexTableUtilsDevice; @@ -179,6 +185,8 @@ class TimeFrameGPU : public TimeFrame ROFOverlapTableN::View mDeviceROFOverlapTableView; ROFVertexLookupTableN::View mDeviceROFVertexLookupTableView; ROFMaskTableN::View mDeviceROFMaskTableView; + std::vector mDeviceTrackerTopologyViews; + typename TrackingTopologyN::View mDeviceTrackingTopologyView; // Hybrid pref Vertex* mPrimaryVerticesDevice; @@ -193,30 +201,29 @@ class TimeFrameGPU : public TimeFrame const int** mClustersIndexTablesDeviceArray; uint8_t** mUsedClustersDeviceArray; const int** mROFramesClustersDeviceArray; - std::array mTrackletsDevice; - std::array mTrackletsLUTDevice; - std::array mCellsLUTDevice; - std::array mNeighboursLUTDevice; + std::array mTrackletsDevice{}; + std::array mTrackletsLUTDevice{}; + std::array mCellsLUTDevice{}; + std::array mNeighboursLUTDevice{}; Tracklet** mTrackletsDeviceArray{nullptr}; int** mCellsLUTDeviceArray{nullptr}; - int** mNeighboursCellDeviceArray{nullptr}; int** mNeighboursCellLUTDeviceArray{nullptr}; int** mTrackletsLUTDeviceArray{nullptr}; - std::array mCellsDevice; + std::array mCellsDevice{}; CellSeed** mCellsDeviceArray; - std::array mNeighboursIndexTablesDevice; + std::array mNeighboursIndexTablesDevice{}; TrackSeedN* mTrackSeedsDevice{nullptr}; int* mTrackSeedsLUTDevice{nullptr}; unsigned int mNTracks{0}; - std::array mCellSeedsDevice; + std::array mCellSeedsDevice{}; o2::track::TrackParCovF** mCellSeedsDeviceArray; - std::array mCellSeedsChi2Device; + std::array mCellSeedsChi2Device{}; float** mCellSeedsChi2DeviceArray; TrackITSExt* mTrackITSExtDevice; - std::array*, NLayers - 2> mNeighbourPairsDevice; - std::array mNeighboursDevice; + std::array mNeighboursDevice{}; + CellNeighbour** mNeighboursDeviceArray{nullptr}; std::array mTrackingFrameInfoDevice; const TrackingFrameInfo** mTrackingFrameInfoDeviceArray; @@ -245,19 +252,19 @@ inline std::vector TimeFrameGPU::getClusterSizes() template inline size_t TimeFrameGPU::getNumberOfTracklets() const { - return std::accumulate(mNTracklets.begin(), mNTracklets.end(), 0); + return std::accumulate(mNTracklets.begin(), mNTracklets.begin() + this->mTrackingTopologyView.nTransitions, 0); } template inline size_t TimeFrameGPU::getNumberOfCells() const { - return std::accumulate(mNCells.begin(), mNCells.end(), 0); + return std::accumulate(mNCells.begin(), mNCells.begin() + this->mTrackingTopologyView.nCells, 0); } template inline size_t TimeFrameGPU::getNumberOfNeighbours() const { - return std::accumulate(mNNeighbours.begin(), mNNeighbours.end(), 0); + return std::accumulate(mNNeighbours.begin(), mNNeighbours.begin() + this->mTrackingTopologyView.nCells, 0); } } // namespace o2::its::gpu diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h index fe272f6f8d3bb..161283db2a2bc 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h @@ -17,14 +17,14 @@ #include "ITStracking/BoundedAllocator.h" #include "ITStracking/ROFLookupTables.h" -#include "ITStracking/Definitions.h" +#include "ITStracking/TrackingTopology.h" #include "ITStrackingGPU/Utils.h" #include "DetectorsBase/Propagator.h" -#include "GPUCommonDef.h" namespace o2::its { class CellSeed; +struct CellNeighbour; template class TrackSeed; class TrackingFrameInfo; @@ -38,7 +38,9 @@ class ExternalAllocator; template void countTrackletsInROFsHandler(const IndexTableUtils* utils, const typename ROFMaskTable::View& rofMask, - const int layer, + const int transitionId, + const int fromLayer, + const int toLayer, const typename ROFOverlapTable::View& rofOverlaps, const typename ROFVertexLookupTable::View& vertexLUT, const int vertexId, @@ -53,20 +55,23 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils, gsl::span trackletsLUTsHost, const bool selectUPCVertices, const float NSigmaCut, - bounded_vector& phiCuts, + const typename TrackingTopology::View topology, + bounded_vector& transitionPhiCuts, const float resolutionPV, std::array& minR, std::array& maxR, bounded_vector& resolutions, std::vector& radii, - bounded_vector& mulScatAng, + bounded_vector& transitionMSAngles, o2::its::ExternalAllocator* alloc, gpu::Streams& streams); template void computeTrackletsInROFsHandler(const IndexTableUtils* utils, const typename ROFMaskTable::View& rofMask, - const int layer, + const int transitionId, + const int fromLayer, + const int toLayer, const typename ROFOverlapTable::View& rofOverlaps, const typename ROFVertexLookupTable::View& vertexLUT, const int vertexId, @@ -84,13 +89,14 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils, gsl::span trackletsLUTsHost, const bool selectUPCVertices, const float NSigmaCut, - bounded_vector& phiCuts, + const typename TrackingTopology::View topology, + bounded_vector& transitionPhiCuts, const float resolutionPV, std::array& minR, std::array& maxR, bounded_vector& resolutions, std::vector& radii, - bounded_vector& mulScatAng, + bounded_vector& transitionMSAngles, o2::its::ExternalAllocator* alloc, gpu::Streams& streams); @@ -101,7 +107,8 @@ void countCellsHandler(const Cluster** sortedClusters, Tracklet** tracklets, int** trackletsLUT, const int nTracklets, - const int layer, + const int cellTopologyId, + const typename TrackingTopology::View topology, CellSeed* cells, int** cellsLUTsDeviceArray, int* cellsLUTsHost, @@ -120,7 +127,8 @@ void computeCellsHandler(const Cluster** sortedClusters, Tracklet** tracklets, int** trackletsLUT, const int nTracklets, - const int layer, + const int cellTopologyId, + const typename TrackingTopology::View topology, CellSeed* cells, int** cellsLUTsDeviceArray, int* cellsLUTsHost, @@ -133,33 +141,31 @@ void computeCellsHandler(const Cluster** sortedClusters, template void countCellNeighboursHandler(CellSeed** cellsLayersDevice, - int* neighboursLUTs, + int* neighboursCursor, int** cellsLUTs, - gpuPair* cellNeighbours, - int* neighboursIndexTable, - const Tracklet** tracklets, + const int sourceCellTopologyId, + const int targetCellTopologyId, const float maxChi2ClusterAttachment, const float bz, - const int layerIndex, const unsigned int nCells, - const unsigned int nCellsNext, - const int maxCellNeighbours, - o2::its::ExternalAllocator* alloc, gpu::Stream& stream); +void scanCellNeighboursHandler(int* neighboursCursor, + int* neighboursLUT, + const unsigned int nCells, + o2::its::ExternalAllocator* alloc, + gpu::Stream& stream); + template void computeCellNeighboursHandler(CellSeed** cellsLayersDevice, - int* neighboursLUTs, + int* neighboursCursor, int** cellsLUTs, - gpuPair* cellNeighbours, - int* neighboursIndexTable, - const Tracklet** tracklets, + CellNeighbour* cellNeighbours, + const int sourceCellTopologyId, + const int targetCellTopologyId, const float maxChi2ClusterAttachment, const float bz, - const int layerIndex, const unsigned int nCells, - const unsigned int nCellsNext, - const int maxCellNeighbours, gpu::Stream& stream); int filterCellNeighboursHandler(gpuPair*, @@ -169,19 +175,24 @@ int filterCellNeighboursHandler(gpuPair*, o2::its::ExternalAllocator* = nullptr); template -void processNeighboursHandler(const int startLayer, - const int startLevel, +void processNeighboursHandler(const int startLevel, + const int defaultCellTopologyId, CellSeed** allCellSeeds, CellSeed* currentCellSeeds, - std::array& nCells, + const int* currentCellTopologyIds, + const int* currentCellIds, + const int* nCells, const unsigned char** usedClusters, - std::array& neighbours, - gsl::span neighboursDeviceLUTs, + CellNeighbour** neighbours, + int** neighboursDeviceLUTs, const TrackingFrameInfo** foundTrackingFrameInfo, bounded_vector>& seedsHost, const float bz, const float MaxChi2ClusterAttachment, const float maxChi2NDF, + const int maxHoles, + const int minTrackLength, + const LayerMask holeLayerMask, const std::vector& layerxX0Host, const o2::base::Propagator* propagator, const o2::base::PropagatorF::MatCorrType matCorrType, diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu index b541518a88119..5fff30f5162b1 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu @@ -328,6 +328,37 @@ void TimeFrameGPU::loadROFVertexLookupTable() } } +template +void TimeFrameGPU::loadTrackingTopologies() +{ + GPUTimer timer("initialising device views of TrackingTopology"); + const auto& hostTopologies = this->getTrackerTopologies(); + mDeviceTrackerTopologyViews.resize(hostTopologies.size()); + using LayerTransition = typename TrackingTopologyN::LayerTransition; + using CellTopology = typename TrackingTopologyN::CellTopology; + using Range = typename TrackingTopologyN::Range; + using Id = typename TrackingTopologyN::Id; + for (size_t iteration = 0; iteration < hostTopologies.size(); ++iteration) { + const auto& topology = hostTopologies[iteration]; + LayerTransition* dTransitions{nullptr}; + CellTopology* dCells{nullptr}; + Range* dCellsByFirstTransitionIndex{nullptr}; + Id* dCellsByFirstTransition{nullptr}; + allocMem(reinterpret_cast(&dTransitions), topology.getNTransitions() * sizeof(LayerTransition), this->hasFrameworkAllocator()); + allocMem(reinterpret_cast(&dCells), topology.getNCells() * sizeof(CellTopology), this->hasFrameworkAllocator()); + allocMem(reinterpret_cast(&dCellsByFirstTransitionIndex), topology.getNTransitions() * sizeof(Range), this->hasFrameworkAllocator()); + allocMem(reinterpret_cast(&dCellsByFirstTransition), topology.getNCellsByFirstTransition() * sizeof(Id), this->hasFrameworkAllocator()); + GPUChkErrS(cudaMemcpy(dTransitions, topology.getTransitions().data(), topology.getNTransitions() * sizeof(LayerTransition), cudaMemcpyHostToDevice)); + GPUChkErrS(cudaMemcpy(dCells, topology.getCells().data(), topology.getNCells() * sizeof(CellTopology), cudaMemcpyHostToDevice)); + GPUChkErrS(cudaMemcpy(dCellsByFirstTransitionIndex, topology.getCellsByFirstTransitionIndex().data(), topology.getNTransitions() * sizeof(Range), cudaMemcpyHostToDevice)); + GPUChkErrS(cudaMemcpy(dCellsByFirstTransition, topology.getCellsByFirstTransition().data(), topology.getNCellsByFirstTransition() * sizeof(Id), cudaMemcpyHostToDevice)); + mDeviceTrackerTopologyViews[iteration] = topology.getDeviceView(dTransitions, dCells, dCellsByFirstTransitionIndex, dCellsByFirstTransition); + } + if (!mDeviceTrackerTopologyViews.empty()) { + mDeviceTrackingTopologyView = mDeviceTrackerTopologyViews.front(); + } +} + template void TimeFrameGPU::updateROFVertexLookupTable() { @@ -348,7 +379,7 @@ template void TimeFrameGPU::createTrackletsLUTDeviceArray() { { - allocMem(reinterpret_cast(&mTrackletsLUTDeviceArray), (NLayers - 1) * sizeof(int*), this->hasFrameworkAllocator()); + allocMem(reinterpret_cast(&mTrackletsLUTDeviceArray), MaxTransitions * sizeof(int*), this->hasFrameworkAllocator()); } } @@ -356,8 +387,9 @@ template void TimeFrameGPU::createTrackletsLUTDevice(bool allocate, const int layer) { GPUTimer timer(mGpuStreams[layer], "creating tracklets LUTs", layer); - const int ncls = this->mClusters[layer].size() + 1; - if (allocate) { + const int fromLayer = this->mTrackingTopologyView.getTransition(layer).fromLayer; + const int ncls = this->mClusters[fromLayer].size() + 1; + if (allocate || mTrackletsLUTDevice[layer] == nullptr) { GPULog("gpu-allocation: creating tracklets LUT for {} elements on layer {}, for {:.2f} MB.", ncls, layer, ncls * sizeof(int) / constants::MB); allocMemAsync(reinterpret_cast(&mTrackletsLUTDevice[layer]), ncls * sizeof(int), mGpuStreams[layer], this->hasFrameworkAllocator()); GPUChkErrS(cudaMemcpyAsync(&mTrackletsLUTDeviceArray[layer], &mTrackletsLUTDevice[layer], sizeof(int*), cudaMemcpyHostToDevice, mGpuStreams[layer].get())); @@ -370,7 +402,7 @@ void TimeFrameGPU::createTrackletsBuffersArray() { { GPUTimer timer("creating tracklet buffers array"); - allocMem(reinterpret_cast(&mTrackletsDeviceArray), (NLayers - 1) * sizeof(Tracklet*), this->hasFrameworkAllocator()); + allocMem(reinterpret_cast(&mTrackletsDeviceArray), MaxTransitions * sizeof(Tracklet*), this->hasFrameworkAllocator()); } } @@ -379,7 +411,8 @@ void TimeFrameGPU::createTrackletsBuffers(const int layer) { GPUTimer timer(mGpuStreams[layer], "creating tracklet buffers", layer); mNTracklets[layer] = 0; - GPUChkErrS(cudaMemcpyAsync(&mNTracklets[layer], mTrackletsLUTDevice[layer] + this->mClusters[layer].size(), sizeof(int), cudaMemcpyDeviceToHost, mGpuStreams[layer].get())); + const int fromLayer = this->mTrackingTopologyView.getTransition(layer).fromLayer; + GPUChkErrS(cudaMemcpyAsync(&mNTracklets[layer], mTrackletsLUTDevice[layer] + this->mClusters[fromLayer].size(), sizeof(int), cudaMemcpyDeviceToHost, mGpuStreams[layer].get())); mGpuStreams[layer].sync(); // ensure number of tracklets is correct GPULog("gpu-transfer: creating tracklets buffer for {} elements on layer {}, for {:.2f} MB.", mNTracklets[layer], layer, mNTracklets[layer] * sizeof(Tracklet) / constants::MB); allocMemAsync(reinterpret_cast(&mTrackletsDevice[layer]), mNTracklets[layer] * sizeof(Tracklet), mGpuStreams[layer], this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK)); @@ -426,6 +459,7 @@ void TimeFrameGPU::createNeighboursLUTDevice(const int layer, const uns GPULog("gpu-allocation: reserving neighbours LUT for {} elements on layer {} , for {:.2f} MB.", nCells + 1, layer, (nCells + 1) * sizeof(int) / constants::MB); allocMemAsync(reinterpret_cast(&mNeighboursLUTDevice[layer]), (nCells + 1) * sizeof(int), mGpuStreams[layer], this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK)); // We need one element more to move exc -> inc GPUChkErrS(cudaMemsetAsync(mNeighboursLUTDevice[layer], 0, (nCells + 1) * sizeof(int), mGpuStreams[layer].get())); + GPUChkErrS(cudaMemcpyAsync(&mNeighboursCellLUTDeviceArray[layer], &mNeighboursLUTDevice[layer], sizeof(int*), cudaMemcpyHostToDevice, mGpuStreams[layer].get())); } template @@ -446,7 +480,9 @@ void TimeFrameGPU::createCellsLUTDeviceArray() { { GPUTimer timer("creating cells LUTs array"); - allocMem(reinterpret_cast(&mCellsLUTDeviceArray), (NLayers - 2) * sizeof(int*), this->hasFrameworkAllocator()); + allocMem(reinterpret_cast(&mCellsLUTDeviceArray), MaxCells * sizeof(int*), this->hasFrameworkAllocator()); + allocMem(reinterpret_cast(&mNeighboursCellLUTDeviceArray), MaxCells * sizeof(int*), this->hasFrameworkAllocator()); + GPUChkErrS(cudaMemset(mNeighboursCellLUTDeviceArray, 0, MaxCells * sizeof(int*))); } } @@ -454,9 +490,10 @@ template void TimeFrameGPU::createCellsLUTDevice(const int layer) { GPUTimer timer(mGpuStreams[layer], "creating cells LUTs", layer); - GPULog("gpu-transfer: creating cell LUT for {} elements on layer {}, for {:.2f} MB.", mNTracklets[layer] + 1, layer, (mNTracklets[layer] + 1) * sizeof(int) / constants::MB); - allocMemAsync(reinterpret_cast(&mCellsLUTDevice[layer]), (mNTracklets[layer] + 1) * sizeof(int), mGpuStreams[layer], this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK)); - GPUChkErrS(cudaMemsetAsync(mCellsLUTDevice[layer], 0, (mNTracklets[layer] + 1) * sizeof(int), mGpuStreams[layer].get())); + const int firstTransition = this->mTrackingTopologyView.getCell(layer).firstTransition; + GPULog("gpu-transfer: creating cell LUT for {} elements on layer {}, for {:.2f} MB.", mNTracklets[firstTransition] + 1, layer, (mNTracklets[firstTransition] + 1) * sizeof(int) / constants::MB); + allocMemAsync(reinterpret_cast(&mCellsLUTDevice[layer]), (mNTracklets[firstTransition] + 1) * sizeof(int), mGpuStreams[layer], this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK)); + GPUChkErrS(cudaMemsetAsync(mCellsLUTDevice[layer], 0, (mNTracklets[firstTransition] + 1) * sizeof(int), mGpuStreams[layer].get())); GPUChkErrS(cudaMemcpyAsync(&mCellsLUTDeviceArray[layer], &mCellsLUTDevice[layer], sizeof(int*), cudaMemcpyHostToDevice, mGpuStreams[layer].get())); } @@ -465,7 +502,9 @@ void TimeFrameGPU::createCellsBuffersArray() { { GPUTimer timer("creating cells buffers array"); - allocMem(reinterpret_cast(&mCellsDeviceArray), (NLayers - 2) * sizeof(CellSeed*), this->hasFrameworkAllocator()); + allocMem(reinterpret_cast(&mCellsDeviceArray), MaxCells * sizeof(CellSeed*), this->hasFrameworkAllocator()); + allocMem(reinterpret_cast(&mNeighboursDeviceArray), MaxCells * sizeof(CellNeighbour*), this->hasFrameworkAllocator()); + GPUChkErrS(cudaMemset(mNeighboursDeviceArray, 0, MaxCells * sizeof(CellNeighbour*))); GPUChkErrS(cudaMemcpy(mCellsDeviceArray, mCellsDevice.data(), mCellsDevice.size() * sizeof(CellSeed*), cudaMemcpyHostToDevice)); } } @@ -475,7 +514,8 @@ void TimeFrameGPU::createCellsBuffers(const int layer) { GPUTimer timer(mGpuStreams[layer], "creating cells buffers"); mNCells[layer] = 0; - GPUChkErrS(cudaMemcpyAsync(&mNCells[layer], mCellsLUTDevice[layer] + mNTracklets[layer], sizeof(int), cudaMemcpyDeviceToHost, mGpuStreams[layer].get())); + const int firstTransition = this->mTrackingTopologyView.getCell(layer).firstTransition; + GPUChkErrS(cudaMemcpyAsync(&mNCells[layer], mCellsLUTDevice[layer] + mNTracklets[firstTransition], sizeof(int), cudaMemcpyDeviceToHost, mGpuStreams[layer].get())); mGpuStreams[layer].sync(); // ensure number of cells is correct GPULog("gpu-transfer: creating cell buffer for {} elements on layer {}, for {:.2f} MB.", mNCells[layer], layer, mNCells[layer] * sizeof(CellSeed) / constants::MB); allocMemAsync(reinterpret_cast(&mCellsDevice[layer]), mNCells[layer] * sizeof(CellSeed), mGpuStreams[layer], this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK)); @@ -511,13 +551,22 @@ void TimeFrameGPU::createNeighboursDevice(const unsigned int layer) { GPUTimer timer(mGpuStreams[layer], "reserving neighbours", layer); this->mNNeighbours[layer] = 0; - GPUChkErrS(cudaMemcpyAsync(&(this->mNNeighbours[layer]), &(mNeighboursLUTDevice[layer][this->mNCells[layer + 1] - 1]), sizeof(unsigned int), cudaMemcpyDeviceToHost, mGpuStreams[layer].get())); + if (this->mNCells[layer] == 0) { + mNeighboursDevice[layer] = nullptr; + GPUChkErrS(cudaMemcpyAsync(&mNeighboursDeviceArray[layer], &mNeighboursDevice[layer], sizeof(CellNeighbour*), cudaMemcpyHostToDevice, mGpuStreams[layer].get())); + return; + } + GPUChkErrS(cudaMemcpyAsync(&(this->mNNeighbours[layer]), &(mNeighboursLUTDevice[layer][this->mNCells[layer]]), sizeof(unsigned int), cudaMemcpyDeviceToHost, mGpuStreams[layer].get())); mGpuStreams[layer].sync(); // ensure number of neighbours is correct - GPULog("gpu-allocation: reserving {} neighbours (pairs), for {:.2f} MB.", this->mNNeighbours[layer], (this->mNNeighbours[layer]) * sizeof(gpuPair) / constants::MB); - allocMemAsync(reinterpret_cast(&mNeighbourPairsDevice[layer]), (this->mNNeighbours[layer]) * sizeof(gpuPair), mGpuStreams[layer], this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK)); - GPUChkErrS(cudaMemsetAsync(mNeighbourPairsDevice[layer], -1, (this->mNNeighbours[layer]) * sizeof(gpuPair), mGpuStreams[layer].get())); - GPULog("gpu-allocation: reserving {} neighbours, for {:.2f} MB.", this->mNNeighbours[layer], (this->mNNeighbours[layer]) * sizeof(gpuPair) / constants::MB); - allocMemAsync(reinterpret_cast(&mNeighboursDevice[layer]), (this->mNNeighbours[layer]) * sizeof(int), mGpuStreams[layer], this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK)); + if (this->mNNeighbours[layer] == 0) { + mNeighboursDevice[layer] = nullptr; + GPUChkErrS(cudaMemcpyAsync(&mNeighboursDeviceArray[layer], &mNeighboursDevice[layer], sizeof(CellNeighbour*), cudaMemcpyHostToDevice, mGpuStreams[layer].get())); + return; + } + GPULog("gpu-allocation: reserving {} neighbours, for {:.2f} MB.", this->mNNeighbours[layer], (this->mNNeighbours[layer]) * sizeof(CellNeighbour) / constants::MB); + allocMemAsync(reinterpret_cast(&mNeighboursDevice[layer]), (this->mNNeighbours[layer]) * sizeof(CellNeighbour), mGpuStreams[layer], this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK)); + GPUChkErrS(cudaMemsetAsync(mNeighboursDevice[layer], -1, (this->mNNeighbours[layer]) * sizeof(CellNeighbour), mGpuStreams[layer].get())); + GPUChkErrS(cudaMemcpyAsync(&mNeighboursDeviceArray[layer], &mNeighboursDevice[layer], sizeof(CellNeighbour*), cudaMemcpyHostToDevice, mGpuStreams[layer].get())); } template @@ -555,11 +604,11 @@ void TimeFrameGPU::downloadCellsLUTDevice() } template -void TimeFrameGPU::downloadCellsNeighboursDevice(std::vector>>& neighbours, const int layer) +void TimeFrameGPU::downloadCellsNeighboursDevice(std::vector>& neighbours, const int layer) { GPUTimer timer(mGpuStreams[layer], "downloading neighbours from layer", layer); - GPULog("gpu-transfer: downloading {} neighbours, for {:.2f} MB.", neighbours[layer].size(), neighbours[layer].size() * sizeof(std::pair) / constants::MB); - GPUChkErrS(cudaMemcpyAsync(neighbours[layer].data(), mNeighbourPairsDevice[layer], neighbours[layer].size() * sizeof(gpuPair), cudaMemcpyDeviceToHost, mGpuStreams[layer].get())); + GPULog("gpu-transfer: downloading {} neighbours, for {:.2f} MB.", neighbours[layer].size(), neighbours[layer].size() * sizeof(CellNeighbour) / constants::MB); + GPUChkErrS(cudaMemcpyAsync(neighbours[layer].data(), mNeighboursDevice[layer], neighbours[layer].size() * sizeof(CellNeighbour), cudaMemcpyDeviceToHost, mGpuStreams[layer].get())); } template @@ -648,10 +697,20 @@ void TimeFrameGPU::popMemoryStack(const int iteration) template void TimeFrameGPU::initialise(const TrackingParameters& trkParam, int maxLayers) { - mGpuStreams.resize(NLayers); + mGpuStreams.resize(MaxStreams); o2::its::TimeFrame::initialise(trkParam, maxLayers); } +template +void TimeFrameGPU::initialise(const TrackingParameters& trkParam, int maxLayers, int iteration) +{ + mGpuStreams.resize(MaxStreams); + o2::its::TimeFrame::initialise(trkParam, maxLayers, iteration); + if (iteration != constants::UnusedIndex && iteration < static_cast(mDeviceTrackerTopologyViews.size())) { + mDeviceTrackingTopologyView = mDeviceTrackerTopologyViews[iteration]; + } +} + template void TimeFrameGPU::syncStream(const size_t stream) { diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx index 2d2ca5432cdf9..8e19f925ba3eb 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx @@ -22,7 +22,7 @@ namespace o2::its template void TrackerTraitsGPU::initialiseTimeFrame(const int iteration) { - mTimeFrameGPU->initialise(this->mTrkParams[iteration], NLayers); + mTimeFrameGPU->initialise(this->mTrkParams[iteration], NLayers, iteration); if (this->mTrkParams[iteration].PassFlags[IterationStep::FirstPass]) { // on default stream @@ -30,6 +30,7 @@ void TrackerTraitsGPU::initialiseTimeFrame(const int iteration) // TODO these tables can be put in persistent memory mTimeFrameGPU->loadROFOverlapTable(); // this can be put in constant memory actually mTimeFrameGPU->loadROFVertexLookupTable(); + mTimeFrameGPU->loadTrackingTopologies(); // once the tables are in persistent memory just update the vertex one // mTimeFrameGPU->updateROFVertexLookupTable(); mTimeFrameGPU->loadIndexTableUtils(); @@ -63,8 +64,9 @@ void TrackerTraitsGPU::adoptTimeFrame(TimeFrame* tf) template void TrackerTraitsGPU::computeLayerTracklets(const int iteration, int iVertex) { - // start by queuing loading needed of two last layers - for (int iLayer{NLayers}; iLayer-- > NLayers - 2;) { + const auto topology = mTimeFrameGPU->getDeviceTrackingTopologyView(); + const auto hostTopology = mTimeFrameGPU->getTrackingTopologyView(); + for (int iLayer{0}; iLayer < this->mTrkParams[iteration].NLayers; ++iLayer) { if (this->mTrkParams[iteration].PassFlags[IterationStep::FirstPass]) { mTimeFrameGPU->createUsedClustersDevice(iLayer); mTimeFrameGPU->loadClustersDevice(iLayer); @@ -74,21 +76,16 @@ void TrackerTraitsGPU::computeLayerTracklets(const int iteration, int i mTimeFrameGPU->recordEvent(iLayer); } - for (int iLayer{this->mTrkParams[iteration].TrackletsPerRoad()}; iLayer--;) { - if (iLayer) { // queue loading data of next layer in parallel, this the copies are overlapping with computation kernels - if (this->mTrkParams[iteration].PassFlags[IterationStep::FirstPass]) { - mTimeFrameGPU->createUsedClustersDevice(iLayer - 1); - mTimeFrameGPU->loadClustersDevice(iLayer - 1); - mTimeFrameGPU->loadClustersIndexTables(iLayer - 1); - mTimeFrameGPU->loadROFrameClustersDevice(iLayer - 1); - } - mTimeFrameGPU->recordEvent(iLayer - 1); - } - mTimeFrameGPU->createTrackletsLUTDevice(this->mTrkParams[iteration].PassFlags[IterationStep::FirstPass], iLayer); - mTimeFrameGPU->waitEvent(iLayer, iLayer + 1); // wait stream until all data is available + for (int transitionId{0}; transitionId < hostTopology.nTransitions; ++transitionId) { + const auto transition = hostTopology.getTransition(transitionId); + mTimeFrameGPU->createTrackletsLUTDevice(this->mTrkParams[iteration].PassFlags[IterationStep::FirstPass], transitionId); + mTimeFrameGPU->waitEvent(transitionId, transition.fromLayer); + mTimeFrameGPU->waitEvent(transitionId, transition.toLayer); countTrackletsInROFsHandler(mTimeFrameGPU->getDeviceIndexTableUtils(), mTimeFrameGPU->getDeviceROFMaskTableView(), - iLayer, + transitionId, + transition.fromLayer, + transition.toLayer, mTimeFrameGPU->getDeviceROFOverlapTableView(), mTimeFrameGPU->getDeviceROFVertexLookupTableView(), iVertex, @@ -103,22 +100,26 @@ void TrackerTraitsGPU::computeLayerTracklets(const int iteration, int i mTimeFrameGPU->getDeviceTrackletsLUTs(), this->mTrkParams[iteration].PassFlags[IterationStep::SelectUPCVertices], this->mTrkParams[iteration].NSigmaCut, - mTimeFrameGPU->getPhiCuts(), + topology, + mTimeFrameGPU->getTransitionPhiCuts(), this->mTrkParams[iteration].PVres, mTimeFrameGPU->getMinRs(), mTimeFrameGPU->getMaxRs(), mTimeFrameGPU->getPositionResolutions(), this->mTrkParams[iteration].LayerRadii, - mTimeFrameGPU->getMSangles(), + mTimeFrameGPU->getTransitionMSAngles(), mTimeFrameGPU->getFrameworkAllocator(), mTimeFrameGPU->getStreams()); - mTimeFrameGPU->createTrackletsBuffers(iLayer); - if (mTimeFrameGPU->getNTracklets()[iLayer] == 0) { + mTimeFrameGPU->createTrackletsBuffers(transitionId); + if (mTimeFrameGPU->getNTracklets()[transitionId] == 0) { + mTimeFrameGPU->recordEvent(transitionId); continue; } computeTrackletsInROFsHandler(mTimeFrameGPU->getDeviceIndexTableUtils(), mTimeFrameGPU->getDeviceROFMaskTableView(), - iLayer, + transitionId, + transition.fromLayer, + transition.toLayer, mTimeFrameGPU->getDeviceROFOverlapTableView(), mTimeFrameGPU->getDeviceROFVertexLookupTableView(), iVertex, @@ -136,23 +137,26 @@ void TrackerTraitsGPU::computeLayerTracklets(const int iteration, int i mTimeFrameGPU->getDeviceTrackletsLUTs(), this->mTrkParams[iteration].PassFlags[IterationStep::SelectUPCVertices], this->mTrkParams[iteration].NSigmaCut, - mTimeFrameGPU->getPhiCuts(), + topology, + mTimeFrameGPU->getTransitionPhiCuts(), this->mTrkParams[iteration].PVres, mTimeFrameGPU->getMinRs(), mTimeFrameGPU->getMaxRs(), mTimeFrameGPU->getPositionResolutions(), this->mTrkParams[iteration].LayerRadii, - mTimeFrameGPU->getMSangles(), + mTimeFrameGPU->getTransitionMSAngles(), mTimeFrameGPU->getFrameworkAllocator(), mTimeFrameGPU->getStreams()); + mTimeFrameGPU->recordEvent(transitionId); } } template void TrackerTraitsGPU::computeLayerCells(const int iteration) { - // start by queuing loading needed of three last layers - for (int iLayer{NLayers}; iLayer-- > NLayers - 3;) { + const auto topology = mTimeFrameGPU->getDeviceTrackingTopologyView(); + const auto hostTopology = mTimeFrameGPU->getTrackingTopologyView(); + for (int iLayer{0}; iLayer < this->mTrkParams[iteration].NLayers; ++iLayer) { if (this->mTrkParams[iteration].PassFlags[IterationStep::FirstPass]) { mTimeFrameGPU->loadUnsortedClustersDevice(iLayer); mTimeFrameGPU->loadTrackingFrameInfoDevice(iLayer); @@ -160,35 +164,33 @@ void TrackerTraitsGPU::computeLayerCells(const int iteration) mTimeFrameGPU->recordEvent(iLayer); } - for (int iLayer{this->mTrkParams[iteration].CellsPerRoad()}; iLayer--;) { - if (iLayer) { - if (this->mTrkParams[iteration].PassFlags[IterationStep::FirstPass]) { - mTimeFrameGPU->loadUnsortedClustersDevice(iLayer - 1); - mTimeFrameGPU->loadTrackingFrameInfoDevice(iLayer - 1); - } - mTimeFrameGPU->recordEvent(iLayer - 1); - } - - // if there are no tracklets skip entirely - const int currentLayerTrackletsNum{static_cast(mTimeFrameGPU->getNTracklets()[iLayer])}; - if (!currentLayerTrackletsNum || !mTimeFrameGPU->getNTracklets()[iLayer + 1]) { - mTimeFrameGPU->getNCells()[iLayer] = 0; + for (int cellTopologyId{hostTopology.nCells}; cellTopologyId--;) { + const auto cellTopology = hostTopology.getCell(cellTopologyId); + const auto first = hostTopology.getTransition(cellTopology.firstTransition); + const auto second = hostTopology.getTransition(cellTopology.secondTransition); + const int currentLayerTrackletsNum{static_cast(mTimeFrameGPU->getNTracklets()[cellTopology.firstTransition])}; + if (!currentLayerTrackletsNum || !mTimeFrameGPU->getNTracklets()[cellTopology.secondTransition]) { + mTimeFrameGPU->getNCells()[cellTopologyId] = 0; continue; } - mTimeFrameGPU->createCellsLUTDevice(iLayer); - mTimeFrameGPU->waitEvent(iLayer, iLayer + 1); // wait stream until all data is available - mTimeFrameGPU->waitEvent(iLayer, iLayer + 2); // wait stream until all data is available + mTimeFrameGPU->createCellsLUTDevice(cellTopologyId); + mTimeFrameGPU->waitEvent(cellTopologyId, cellTopology.firstTransition); + mTimeFrameGPU->waitEvent(cellTopologyId, cellTopology.secondTransition); + mTimeFrameGPU->waitEvent(cellTopologyId, first.fromLayer); + mTimeFrameGPU->waitEvent(cellTopologyId, first.toLayer); + mTimeFrameGPU->waitEvent(cellTopologyId, second.toLayer); countCellsHandler(mTimeFrameGPU->getDeviceArrayClusters(), mTimeFrameGPU->getDeviceArrayUnsortedClusters(), mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(), mTimeFrameGPU->getDeviceArrayTracklets(), mTimeFrameGPU->getDeviceArrayTrackletsLUT(), currentLayerTrackletsNum, - iLayer, + cellTopologyId, + topology, nullptr, mTimeFrameGPU->getDeviceArrayCellsLUT(), - mTimeFrameGPU->getDeviceCellLUTs()[iLayer], + mTimeFrameGPU->getDeviceCellLUTs()[cellTopologyId], this->mBz, this->mTrkParams[iteration].MaxChi2ClusterAttachment, this->mTrkParams[iteration].CellDeltaTanLambdaSigma, @@ -196,8 +198,9 @@ void TrackerTraitsGPU::computeLayerCells(const int iteration) this->mTrkParams[iteration].LayerxX0, mTimeFrameGPU->getFrameworkAllocator(), mTimeFrameGPU->getStreams()); - mTimeFrameGPU->createCellsBuffers(iLayer); - if (mTimeFrameGPU->getNCells()[iLayer] == 0) { + mTimeFrameGPU->createCellsBuffers(cellTopologyId); + if (mTimeFrameGPU->getNCells()[cellTopologyId] == 0) { + mTimeFrameGPU->recordEvent(cellTopologyId); continue; } computeCellsHandler(mTimeFrameGPU->getDeviceArrayClusters(), @@ -206,16 +209,18 @@ void TrackerTraitsGPU::computeLayerCells(const int iteration) mTimeFrameGPU->getDeviceArrayTracklets(), mTimeFrameGPU->getDeviceArrayTrackletsLUT(), currentLayerTrackletsNum, - iLayer, - mTimeFrameGPU->getDeviceCells()[iLayer], + cellTopologyId, + topology, + mTimeFrameGPU->getDeviceCells()[cellTopologyId], mTimeFrameGPU->getDeviceArrayCellsLUT(), - mTimeFrameGPU->getDeviceCellLUTs()[iLayer], + mTimeFrameGPU->getDeviceCellLUTs()[cellTopologyId], this->mBz, this->mTrkParams[iteration].MaxChi2ClusterAttachment, this->mTrkParams[iteration].CellDeltaTanLambdaSigma, this->mTrkParams[iteration].NSigmaCut, this->mTrkParams[iteration].LayerxX0, mTimeFrameGPU->getStreams()); + mTimeFrameGPU->recordEvent(cellTopologyId); } mTimeFrameGPU->syncStreams(false); } @@ -223,58 +228,71 @@ void TrackerTraitsGPU::computeLayerCells(const int iteration) template void TrackerTraitsGPU::findCellsNeighbours(const int iteration) { - for (int iLayer{0}; iLayer < this->mTrkParams[iteration].NeighboursPerRoad(); ++iLayer) { - if (iLayer > 0) { - // Previous layer updates levels in this layer's cells. - mTimeFrameGPU->waitEvent(iLayer, iLayer - 1); - } - const int currentLayerCellsNum{static_cast(mTimeFrameGPU->getNCells()[iLayer])}; - const int nextLayerCellsNum{static_cast(mTimeFrameGPU->getNCells()[iLayer + 1])}; - if (!nextLayerCellsNum || !currentLayerCellsNum) { - mTimeFrameGPU->getNNeighbours()[iLayer] = 0; - mTimeFrameGPU->recordEvent(iLayer); - continue; - } - mTimeFrameGPU->createNeighboursIndexTablesDevice(iLayer); - mTimeFrameGPU->createNeighboursLUTDevice(iLayer, nextLayerCellsNum); - countCellNeighboursHandler(mTimeFrameGPU->getDeviceArrayCells(), - mTimeFrameGPU->getDeviceNeighboursLUT(iLayer), // LUT is initialised here. - mTimeFrameGPU->getDeviceArrayCellsLUT(), - mTimeFrameGPU->getDeviceNeighbourPairs(iLayer), - mTimeFrameGPU->getDeviceNeighboursIndexTables(iLayer), - (const Tracklet**)mTimeFrameGPU->getDeviceArrayTracklets(), - this->mTrkParams[iteration].MaxChi2ClusterAttachment, - this->mBz, - iLayer, - currentLayerCellsNum, - nextLayerCellsNum, - 1e2, - mTimeFrameGPU->getFrameworkAllocator(), - mTimeFrameGPU->getStream(iLayer)); - mTimeFrameGPU->createNeighboursDevice(iLayer); - if (mTimeFrameGPU->getNNeighbours()[iLayer] == 0) { - mTimeFrameGPU->recordEvent(iLayer); - continue; + const auto hostTopology = mTimeFrameGPU->getTrackingTopologyView(); + for (int outerLayer{0}; outerLayer < NLayers; ++outerLayer) { + for (int targetCellTopologyId{0}; targetCellTopologyId < hostTopology.nCells; ++targetCellTopologyId) { + const auto targetCellTopology = hostTopology.getCell(targetCellTopologyId); + if (targetCellTopology.hitLayerMask.last() != outerLayer) { + continue; + } + const int targetCellsNum{static_cast(mTimeFrameGPU->getNCells()[targetCellTopologyId])}; + if (!targetCellsNum) { + mTimeFrameGPU->getNNeighbours()[targetCellTopologyId] = 0; + mTimeFrameGPU->recordEvent(targetCellTopologyId); + continue; + } + mTimeFrameGPU->createNeighboursIndexTablesDevice(targetCellTopologyId); + mTimeFrameGPU->createNeighboursLUTDevice(targetCellTopologyId, targetCellsNum); + + for (int sourceCellTopologyId{0}; sourceCellTopologyId < hostTopology.nCells; ++sourceCellTopologyId) { + const auto sourceCellTopology = hostTopology.getCell(sourceCellTopologyId); + const int sourceCellsNum{static_cast(mTimeFrameGPU->getNCells()[sourceCellTopologyId])}; + if (!sourceCellsNum || sourceCellTopology.secondTransition != targetCellTopology.firstTransition) { + continue; + } + mTimeFrameGPU->waitEvent(targetCellTopologyId, sourceCellTopologyId); + countCellNeighboursHandler(mTimeFrameGPU->getDeviceArrayCells(), + mTimeFrameGPU->getDeviceNeighboursIndexTables(targetCellTopologyId), + mTimeFrameGPU->getDeviceArrayCellsLUT(), + sourceCellTopologyId, + targetCellTopologyId, + this->mTrkParams[iteration].MaxChi2ClusterAttachment, + this->mBz, + sourceCellsNum, + mTimeFrameGPU->getStream(targetCellTopologyId)); + } + + scanCellNeighboursHandler(mTimeFrameGPU->getDeviceNeighboursIndexTables(targetCellTopologyId), + mTimeFrameGPU->getDeviceNeighboursLUT(targetCellTopologyId), + targetCellsNum, + mTimeFrameGPU->getFrameworkAllocator(), + mTimeFrameGPU->getStream(targetCellTopologyId)); + + mTimeFrameGPU->createNeighboursDevice(targetCellTopologyId); + if (mTimeFrameGPU->getNNeighbours()[targetCellTopologyId] == 0) { + mTimeFrameGPU->recordEvent(targetCellTopologyId); + continue; + } + + for (int sourceCellTopologyId{0}; sourceCellTopologyId < hostTopology.nCells; ++sourceCellTopologyId) { + const auto sourceCellTopology = hostTopology.getCell(sourceCellTopologyId); + const int sourceCellsNum{static_cast(mTimeFrameGPU->getNCells()[sourceCellTopologyId])}; + if (!sourceCellsNum || sourceCellTopology.secondTransition != targetCellTopology.firstTransition) { + continue; + } + computeCellNeighboursHandler(mTimeFrameGPU->getDeviceArrayCells(), + mTimeFrameGPU->getDeviceNeighboursIndexTables(targetCellTopologyId), + mTimeFrameGPU->getDeviceArrayCellsLUT(), + mTimeFrameGPU->getDeviceNeighbours(targetCellTopologyId), + sourceCellTopologyId, + targetCellTopologyId, + this->mTrkParams[iteration].MaxChi2ClusterAttachment, + this->mBz, + sourceCellsNum, + mTimeFrameGPU->getStream(targetCellTopologyId)); + } + mTimeFrameGPU->recordEvent(targetCellTopologyId); } - computeCellNeighboursHandler(mTimeFrameGPU->getDeviceArrayCells(), - mTimeFrameGPU->getDeviceNeighboursLUT(iLayer), - mTimeFrameGPU->getDeviceArrayCellsLUT(), - mTimeFrameGPU->getDeviceNeighbourPairs(iLayer), - mTimeFrameGPU->getDeviceNeighboursIndexTables(iLayer), - (const Tracklet**)mTimeFrameGPU->getDeviceArrayTracklets(), - this->mTrkParams[iteration].MaxChi2ClusterAttachment, - this->mBz, - iLayer, - currentLayerCellsNum, - nextLayerCellsNum, - 1e2, - mTimeFrameGPU->getStream(iLayer)); - mTimeFrameGPU->getArrayNNeighbours()[iLayer] = filterCellNeighboursHandler(mTimeFrameGPU->getDeviceNeighbourPairs(iLayer), - mTimeFrameGPU->getDeviceNeighbours(iLayer), - mTimeFrameGPU->getArrayNNeighbours()[iLayer], - mTimeFrameGPU->getStream(iLayer), - mTimeFrameGPU->getFrameworkAllocator()); - mTimeFrameGPU->recordEvent(iLayer); } mTimeFrameGPU->syncStreams(false); } @@ -286,26 +304,33 @@ void TrackerTraitsGPU::findRoads(const int iteration) bounded_vector> sharedFirstClusters(this->mTrkParams[iteration].NLayers, bounded_vector(this->getMemoryPool().get()), this->getMemoryPool().get()); firstClusters.resize(this->mTrkParams[iteration].NLayers); sharedFirstClusters.resize(this->mTrkParams[iteration].NLayers); + const auto hostTopology = mTimeFrameGPU->getTrackingTopologyView(); for (int startLevel{this->mTrkParams[iteration].CellsPerRoad()}; startLevel >= this->mTrkParams[iteration].CellMinimumLevel(); --startLevel) { - const int minimumLayer{startLevel - 1}; bounded_vector> trackSeeds(this->getMemoryPool().get()); - for (int startLayer{this->mTrkParams[iteration].CellsPerRoad() - 1}; startLayer >= minimumLayer; --startLayer) { - if ((this->mTrkParams[iteration].StartLayerMask & (1 << (startLayer + 2))) == 0) { + for (int startCellTopologyId{0}; startCellTopologyId < hostTopology.nCells; ++startCellTopologyId) { + const int startLayer = hostTopology.getCell(startCellTopologyId).hitLayerMask.last(); + if ((this->mTrkParams[iteration].StartLayerMask & (1 << startLayer)) == 0 || + mTimeFrameGPU->getNCells()[startCellTopologyId] == 0) { continue; } - processNeighboursHandler(startLayer, - startLevel, + processNeighboursHandler(startLevel, + startCellTopologyId, mTimeFrameGPU->getDeviceArrayCells(), - mTimeFrameGPU->getDeviceCells()[startLayer], - mTimeFrameGPU->getArrayNCells(), + mTimeFrameGPU->getDeviceCells()[startCellTopologyId], + nullptr, + nullptr, + mTimeFrameGPU->getArrayNCells().data(), (const uint8_t**)mTimeFrameGPU->getDeviceArrayUsedClusters(), - mTimeFrameGPU->getDeviceNeighboursAll(), - mTimeFrameGPU->getDeviceNeighboursLUTs(), + mTimeFrameGPU->getDeviceArrayNeighbours(), + mTimeFrameGPU->getDeviceArrayNeighboursCellLUT(), mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(), trackSeeds, this->mBz, this->mTrkParams[iteration].MaxChi2ClusterAttachment, this->mTrkParams[iteration].MaxChi2NDF, + this->mTrkParams[iteration].MaxHoles, + this->mTrkParams[iteration].HoleLayerMask, + this->mTrkParams[iteration].MinTrackLength, this->mTrkParams[iteration].LayerxX0, mTimeFrameGPU->getDevicePropagator(), this->mTrkParams[iteration].CorrType, diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index a732327a64d15..571afe08fc209 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -86,20 +86,25 @@ struct is_valid_pair { template struct seed_selector { - float maxQ2Pt; - float maxChi2; + float mMaxQ2Pt; + float mMaxChi2; + int mMaxHoles; + int mMinTrackLength; + LayerMask mHoleLayerMask; - GPUhd() seed_selector(float maxQ2Pt, float maxChi2) : maxQ2Pt(maxQ2Pt), maxChi2(maxChi2) {} + GPUhd() seed_selector(float maxQ2Pt, float maxChi2, int maxHoles, int minTrackLength, LayerMask holeLayerMask) : mMaxQ2Pt(maxQ2Pt), mMaxChi2(maxChi2), mMaxHoles(maxHoles), mMinTrackLength(minTrackLength), mHoleLayerMask(holeLayerMask) {} GPUhd() bool operator()(const TrackSeed& seed) const { - return !(seed.getQ2Pt() > maxQ2Pt || seed.getChi2() > maxChi2); + return !(seed.getQ2Pt() > mMaxQ2Pt || seed.getChi2() > mMaxChi2) && + seed.getHitLayerMask().length() >= mMinTrackLength && + seed.getHitLayerMask().isAllowed(mMaxHoles, mHoleLayerMask); } }; struct compare_track_chi2 { GPUhd() bool operator()(const TrackITSExt& a, const TrackITSExt& b) const { - return a.getChi2() < b.getChi2(); + return o2::its::track::isBetter(a, b); } }; @@ -160,30 +165,22 @@ GPUg() void __launch_bounds__(256, 1) fitTrackSeedsKernel( template GPUg() void __launch_bounds__(256, 1) computeLayerCellNeighboursKernel( CellSeed** cellSeedArray, - int* neighboursLUT, - int* neighboursIndexTable, + int* neighboursCursor, int** cellsLUTs, - gpuPair* cellNeighbours, - const Tracklet** tracklets, + CellNeighbour* cellNeighbours, + const int sourceCellTopologyId, + const int targetCellTopologyId, const float maxChi2ClusterAttachment, const float bz, - const int layerIndex, - const unsigned int nCells, - const int maxCellNeighbours = 1e2) + const unsigned int nCells) { for (int iCurrentCellIndex = blockIdx.x * blockDim.x + threadIdx.x; iCurrentCellIndex < nCells; iCurrentCellIndex += blockDim.x * gridDim.x) { - if constexpr (!initRun) { - if (neighboursIndexTable[iCurrentCellIndex] == neighboursIndexTable[iCurrentCellIndex + 1]) { - continue; - } - } - const auto& currentCellSeed{cellSeedArray[layerIndex][iCurrentCellIndex]}; + const auto& currentCellSeed{cellSeedArray[sourceCellTopologyId][iCurrentCellIndex]}; const int nextLayerTrackletIndex{currentCellSeed.getSecondTrackletIndex()}; - const int nextLayerFirstCellIndex{cellsLUTs[layerIndex + 1][nextLayerTrackletIndex]}; - const int nextLayerLastCellIndex{cellsLUTs[layerIndex + 1][nextLayerTrackletIndex + 1]}; - int foundNeighbours{0}; + const int nextLayerFirstCellIndex{cellsLUTs[targetCellTopologyId][nextLayerTrackletIndex]}; + const int nextLayerLastCellIndex{cellsLUTs[targetCellTopologyId][nextLayerTrackletIndex + 1]}; for (int iNextCell{nextLayerFirstCellIndex}; iNextCell < nextLayerLastCellIndex; ++iNextCell) { - auto nextCellSeed{cellSeedArray[layerIndex + 1][iNextCell]}; // Copy + auto nextCellSeed{cellSeedArray[targetCellTopologyId][iNextCell]}; // Copy if (nextCellSeed.getFirstTrackletIndex() != nextLayerTrackletIndex || !currentCellSeed.getTimeStamp().isCompatible(nextCellSeed.getTimeStamp())) { break; } @@ -199,14 +196,13 @@ GPUg() void __launch_bounds__(256, 1) computeLayerCellNeighboursKernel( } if constexpr (initRun) { - atomicAdd(neighboursLUT + iNextCell, 1); - neighboursIndexTable[iCurrentCellIndex]++; + atomicAdd(neighboursCursor + iNextCell, 1); } else { - cellNeighbours[neighboursIndexTable[iCurrentCellIndex] + foundNeighbours] = {iCurrentCellIndex, iNextCell}; - foundNeighbours++; + const int offset = atomicAdd(neighboursCursor + iNextCell, 1); + cellNeighbours[offset] = {sourceCellTopologyId, iCurrentCellIndex, targetCellTopologyId, iNextCell, currentCellSeed.getLevel() + 1}; const int currentCellLevel{currentCellSeed.getLevel()}; if (currentCellLevel >= nextCellSeed.getLevel()) { - atomicMax(cellSeedArray[layerIndex + 1][iNextCell].getLevelPtr(), currentCellLevel + 1); + atomicMax(cellSeedArray[targetCellTopologyId][iNextCell].getLevelPtr(), currentCellLevel + 1); } } } @@ -221,7 +217,8 @@ GPUg() void __launch_bounds__(256, 1) computeLayerCellsKernel( Tracklet** tracklets, int** trackletsLUT, const int nTrackletsCurrent, - const int layer, + const int cellTopologyId, + const typename TrackingTopology::View topology, CellSeed* cells, int** cellsLUTs, const float* layerxX0, @@ -230,25 +227,29 @@ GPUg() void __launch_bounds__(256, 1) computeLayerCellsKernel( const float cellDeltaTanLambdaSigma, const float nSigmaCut) { + const auto cellTopology = topology.getCell(cellTopologyId); + const auto first = topology.getTransition(cellTopology.firstTransition); + const auto second = topology.getTransition(cellTopology.secondTransition); + const int layers[3] = {first.fromLayer, first.toLayer, second.toLayer}; for (int iCurrentTrackletIndex = blockIdx.x * blockDim.x + threadIdx.x; iCurrentTrackletIndex < nTrackletsCurrent; iCurrentTrackletIndex += blockDim.x * gridDim.x) { if constexpr (!initRun) { - if (cellsLUTs[layer][iCurrentTrackletIndex] == cellsLUTs[layer][iCurrentTrackletIndex + 1]) { + if (cellsLUTs[cellTopologyId][iCurrentTrackletIndex] == cellsLUTs[cellTopologyId][iCurrentTrackletIndex + 1]) { continue; } } - const Tracklet& currentTracklet = tracklets[layer][iCurrentTrackletIndex]; + const Tracklet& currentTracklet = tracklets[cellTopology.firstTransition][iCurrentTrackletIndex]; const int nextLayerClusterIndex{currentTracklet.secondClusterIndex}; - const int nextLayerFirstTrackletIndex{trackletsLUT[layer + 1][nextLayerClusterIndex]}; - const int nextLayerLastTrackletIndex{trackletsLUT[layer + 1][nextLayerClusterIndex + 1]}; + const int nextLayerFirstTrackletIndex{trackletsLUT[cellTopology.secondTransition][nextLayerClusterIndex]}; + const int nextLayerLastTrackletIndex{trackletsLUT[cellTopology.secondTransition][nextLayerClusterIndex + 1]}; if (nextLayerFirstTrackletIndex == nextLayerLastTrackletIndex) { continue; } int foundCells{0}; for (int iNextTrackletIndex{nextLayerFirstTrackletIndex}; iNextTrackletIndex < nextLayerLastTrackletIndex; ++iNextTrackletIndex) { - if (tracklets[layer + 1][iNextTrackletIndex].firstClusterIndex != nextLayerClusterIndex) { + if (tracklets[cellTopology.secondTransition][iNextTrackletIndex].firstClusterIndex != nextLayerClusterIndex) { break; } - const Tracklet& nextTracklet = tracklets[layer + 1][iNextTrackletIndex]; + const Tracklet& nextTracklet = tracklets[cellTopology.secondTransition][iNextTrackletIndex]; if (!currentTracklet.getTimeStamp().isCompatible(nextTracklet.getTimeStamp())) { continue; } @@ -256,18 +257,18 @@ GPUg() void __launch_bounds__(256, 1) computeLayerCellsKernel( if (deltaTanLambda / cellDeltaTanLambdaSigma < nSigmaCut) { const int clusId[3]{ - sortedClusters[layer][currentTracklet.firstClusterIndex].clusterId, - sortedClusters[layer + 1][nextTracklet.firstClusterIndex].clusterId, - sortedClusters[layer + 2][nextTracklet.secondClusterIndex].clusterId}; + sortedClusters[layers[0]][currentTracklet.firstClusterIndex].clusterId, + sortedClusters[layers[1]][nextTracklet.firstClusterIndex].clusterId, + sortedClusters[layers[2]][nextTracklet.secondClusterIndex].clusterId}; - const auto& cluster1_glo = unsortedClusters[layer][clusId[0]]; - const auto& cluster2_glo = unsortedClusters[layer + 1][clusId[1]]; - const auto& cluster3_tf = tfInfo[layer + 2][clusId[2]]; + const auto& cluster1_glo = unsortedClusters[layers[0]][clusId[0]]; + const auto& cluster2_glo = unsortedClusters[layers[1]][clusId[1]]; + const auto& cluster3_tf = tfInfo[layers[2]][clusId[2]]; auto track{o2::its::track::buildTrackSeed(cluster1_glo, cluster2_glo, cluster3_tf, bz)}; float chi2{0.f}; bool good{false}; for (int iC{2}; iC--;) { - const TrackingFrameInfo& trackingHit = tfInfo[layer + iC][clusId[iC]]; + const TrackingFrameInfo& trackingHit = tfInfo[layers[iC]][clusId[iC]]; if (!track.rotate(trackingHit.alphaTrackingFrame)) { break; } @@ -275,7 +276,7 @@ GPUg() void __launch_bounds__(256, 1) computeLayerCellsKernel( break; } - if (!track.correctForMaterial(layerxX0[layer + iC], layerxX0[layer + iC] * constants::Radl * constants::Rho, true)) { + if (!track.correctForMaterial(layerxX0[layers[iC]], layerxX0[layers[iC]] * constants::Radl * constants::Rho, true)) { break; } @@ -295,13 +296,13 @@ GPUg() void __launch_bounds__(256, 1) computeLayerCellsKernel( if constexpr (!initRun) { TimeEstBC ts = currentTracklet.getTimeStamp(); ts += nextTracklet.getTimeStamp(); - new (cells + cellsLUTs[layer][iCurrentTrackletIndex] + foundCells) CellSeed{layer, clusId[0], clusId[1], clusId[2], iCurrentTrackletIndex, iNextTrackletIndex, track, chi2, ts}; + new (cells + cellsLUTs[cellTopologyId][iCurrentTrackletIndex] + foundCells) CellSeed{cellTopology.hitLayerMask, clusId[0], clusId[1], clusId[2], iCurrentTrackletIndex, iNextTrackletIndex, track, chi2, ts}; } ++foundCells; } } if constexpr (initRun) { - cellsLUTs[layer][iCurrentTrackletIndex] = foundCells; + cellsLUTs[cellTopologyId][iCurrentTrackletIndex] = foundCells; } } } @@ -310,7 +311,8 @@ template GPUg() void __launch_bounds__(256, 1) computeLayerTrackletsMultiROFKernel( const IndexTableUtils* utils, const typename ROFMaskTable::View rofMask, - const int layerIndex, + const int transitionId, + const typename TrackingTopology::View topology, const typename ROFOverlapTable::View rofOverlaps, const typename ROFVertexLookupTable::View vertexLUT, const Vertex* vertices, @@ -332,17 +334,20 @@ GPUg() void __launch_bounds__(256, 1) computeLayerTrackletsMultiROFKernel( const float meanDeltaR, const float MSAngle) { + const auto transition = topology.getTransition(transitionId); + const int fromLayer = transition.fromLayer; + const int toLayer = transition.toLayer; const int phiBins{utils->getNphiBins()}; const int zBins{utils->getNzBins()}; const int tableSize{phiBins * zBins + 1}; - const int totalROFs0 = rofOverlaps.getLayer(layerIndex).mNROFsTF; - const int totalROFs1 = rofOverlaps.getLayer(layerIndex + 1).mNROFsTF; + const int totalROFs0 = rofOverlaps.getLayer(fromLayer).mNROFsTF; + const int totalROFs1 = rofOverlaps.getLayer(toLayer).mNROFsTF; for (unsigned int pivotROF{blockIdx.x}; pivotROF < totalROFs0; pivotROF += gridDim.x) { - if (!rofMask.isROFEnabled(layerIndex, pivotROF)) { + if (!rofMask.isROFEnabled(fromLayer, pivotROF)) { continue; } - const auto& pvs = vertexLUT.getVertices(layerIndex, pivotROF); + const auto& pvs = vertexLUT.getVertices(fromLayer, pivotROF); auto primaryVertices = gpuSpan(&vertices[pvs.getFirstEntry()], pvs.getEntries()); if (primaryVertices.empty()) { continue; @@ -353,12 +358,12 @@ GPUg() void __launch_bounds__(256, 1) computeLayerTrackletsMultiROFKernel( continue; } - const auto& rofOverlap = rofOverlaps.getOverlap(layerIndex, layerIndex + 1, pivotROF); + const auto& rofOverlap = rofOverlaps.getOverlap(fromLayer, toLayer, pivotROF); if (!rofOverlap.getEntries()) { continue; } - auto clustersCurrentLayer = getClustersOnLayer(pivotROF, totalROFs0, layerIndex, ROFClusters, clusters); + auto clustersCurrentLayer = getClustersOnLayer(pivotROF, totalROFs0, fromLayer, ROFClusters, clusters); if (clustersCurrentLayer.empty()) { continue; } @@ -367,12 +372,12 @@ GPUg() void __launch_bounds__(256, 1) computeLayerTrackletsMultiROFKernel( unsigned int storedTracklets{0}; const auto& currentCluster{clustersCurrentLayer[currentClusterIndex]}; - const int currentSortedIndex{ROFClusters[layerIndex][pivotROF] + currentClusterIndex}; - if (usedClusters[layerIndex][currentCluster.clusterId]) { + const int currentSortedIndex{ROFClusters[fromLayer][pivotROF] + currentClusterIndex}; + if (usedClusters[fromLayer][currentCluster.clusterId]) { continue; } if constexpr (!initRun) { - if (trackletsLUT[layerIndex][currentSortedIndex] == trackletsLUT[layerIndex][currentSortedIndex + 1]) { + if (trackletsLUT[transitionId][currentSortedIndex] == trackletsLUT[transitionId][currentSortedIndex + 1]) { continue; } } @@ -380,7 +385,7 @@ GPUg() void __launch_bounds__(256, 1) computeLayerTrackletsMultiROFKernel( const float inverseR0{1.f / currentCluster.radius}; for (int iV{startVtx}; iV < endVtx; ++iV) { auto& primaryVertex{primaryVertices[iV]}; - if (!vertexLUT.isVertexCompatible(layerIndex, pivotROF, primaryVertex)) { + if (!vertexLUT.isVertexCompatible(fromLayer, pivotROF, primaryVertex)) { continue; } if (primaryVertex.isFlagSet(Vertex::Flags::UPCMode) != selectUPCVertices) { @@ -393,7 +398,7 @@ GPUg() void __launch_bounds__(256, 1) computeLayerTrackletsMultiROFKernel( const float zAtRmax{tanLambda * (maxR - currentCluster.radius) + currentCluster.zCoordinate}; const float sqInverseDeltaZ0{1.f / (math_utils::Sq(currentCluster.zCoordinate - primaryVertex.getZ()) + constants::Tolerance)}; /// protecting from overflows adding the detector resolution const float sigmaZ{o2::gpu::CAMath::Sqrt(math_utils::Sq(resolution) * math_utils::Sq(tanLambda) * ((math_utils::Sq(inverseR0) + sqInverseDeltaZ0) * math_utils::Sq(meanDeltaR) + 1.f) + math_utils::Sq(meanDeltaR * MSAngle))}; - const int4 selectedBinsRect{o2::its::getBinsRect(currentCluster, layerIndex + 1, zAtRmin, zAtRmax, sigmaZ * NSigmaCut, phiCut, *utils)}; + const int4 selectedBinsRect{o2::its::getBinsRect(currentCluster, toLayer, zAtRmin, zAtRmax, sigmaZ * NSigmaCut, phiCut, *utils)}; if (selectedBinsRect.x < 0) { continue; } @@ -404,14 +409,14 @@ GPUg() void __launch_bounds__(256, 1) computeLayerTrackletsMultiROFKernel( } for (short targetROF = rofOverlap.getFirstEntry(); targetROF < rofOverlap.getEntriesBound(); ++targetROF) { - if (!rofMask.isROFEnabled(layerIndex + 1, targetROF)) { + if (!rofMask.isROFEnabled(toLayer, targetROF)) { continue; } - auto clustersNextLayer = getClustersOnLayer(targetROF, totalROFs1, layerIndex + 1, ROFClusters, clusters); + auto clustersNextLayer = getClustersOnLayer(targetROF, totalROFs1, toLayer, ROFClusters, clusters); if (clustersNextLayer.empty()) { continue; } - const auto ts = rofOverlaps.getTimeStamp(layerIndex, pivotROF, layerIndex + 1, targetROF); + const auto ts = rofOverlaps.getTimeStamp(fromLayer, pivotROF, toLayer, targetROF); if (!ts.isCompatible(primaryVertex.getTimeStamp())) { continue; } @@ -419,26 +424,26 @@ GPUg() void __launch_bounds__(256, 1) computeLayerTrackletsMultiROFKernel( int iPhiBin = (selectedBinsRect.y + iPhiCount) % phiBins; const int firstBinIndex{utils->getBinIndex(selectedBinsRect.x, iPhiBin)}; const int maxBinIndex{firstBinIndex + selectedBinsRect.z - selectedBinsRect.x + 1}; - const int firstRowClusterIndex = indexTables[layerIndex + 1][(targetROF)*tableSize + firstBinIndex]; - const int maxRowClusterIndex = indexTables[layerIndex + 1][(targetROF)*tableSize + maxBinIndex]; + const int firstRowClusterIndex = indexTables[toLayer][(targetROF)*tableSize + firstBinIndex]; + const int maxRowClusterIndex = indexTables[toLayer][(targetROF)*tableSize + maxBinIndex]; for (int nextClusterIndex{firstRowClusterIndex}; nextClusterIndex < maxRowClusterIndex; ++nextClusterIndex) { if (nextClusterIndex >= clustersNextLayer.size()) { break; } const Cluster& nextCluster{clustersNextLayer[nextClusterIndex]}; - if (usedClusters[layerIndex + 1][nextCluster.clusterId]) { + if (usedClusters[toLayer][nextCluster.clusterId]) { continue; } const float deltaPhi{o2::gpu::CAMath::Abs(currentCluster.phi - nextCluster.phi)}; const float deltaZ{o2::gpu::CAMath::Abs(tanLambda * (nextCluster.radius - currentCluster.radius) + currentCluster.zCoordinate - nextCluster.zCoordinate)}; if (deltaZ / sigmaZ < NSigmaCut && (deltaPhi < phiCut || o2::gpu::CAMath::Abs(deltaPhi - o2::constants::math::TwoPI) < phiCut)) { if constexpr (initRun) { - trackletsLUT[layerIndex][currentSortedIndex]++; // we need l0 as well for usual exclusive sums. + trackletsLUT[transitionId][currentSortedIndex]++; // we need l0 as well for usual exclusive sums. } else { const float phi{o2::gpu::CAMath::ATan2(currentCluster.yCoordinate - nextCluster.yCoordinate, currentCluster.xCoordinate - nextCluster.xCoordinate)}; const float tanL{(currentCluster.zCoordinate - nextCluster.zCoordinate) / (currentCluster.radius - nextCluster.radius)}; - const int nextSortedIndex{ROFClusters[layerIndex + 1][targetROF] + nextClusterIndex}; - new (tracklets[layerIndex] + trackletsLUT[layerIndex][currentSortedIndex] + storedTracklets) Tracklet{currentSortedIndex, nextSortedIndex, tanL, phi, ts}; + const int nextSortedIndex{ROFClusters[toLayer][targetROF] + nextClusterIndex}; + new (tracklets[transitionId] + trackletsLUT[transitionId][currentSortedIndex] + storedTracklets) Tracklet{currentSortedIndex, nextSortedIndex, tanL, phi, ts}; } ++storedTracklets; } @@ -462,18 +467,20 @@ GPUg() void __launch_bounds__(256, 1) compileTrackletsLookupTableKernel( template GPUg() void __launch_bounds__(256, 1) processNeighboursKernel( - const int layer, + const int defaultCellTopologyId, const int level, CellSeed** allCellSeeds, CurrentSeed* currentCellSeeds, const int* currentCellIds, + const int* currentCellTopologyIds, const unsigned int nCurrentCells, TrackSeed* updatedCellSeeds, int* updatedCellsIds, + int* updatedCellTopologyIds, int* foundSeedsTable, // auxiliary only in GPU code to compute the number of cells per iteration const unsigned char** usedClusters, // Used clusters - int* neighbours, - int* neighboursLUT, + CellNeighbour** neighbours, + int** neighboursLUT, const TrackingFrameInfo** foundTrackingFrameInfo, const float* layerxX0, const float bz, @@ -489,22 +496,33 @@ GPUg() void __launch_bounds__(256, 1) processNeighboursKernel( } int foundSeeds{0}; const auto& currentCell{currentCellSeeds[iCurrentCell]}; + const int cellTopologyId = currentCellTopologyIds == nullptr ? defaultCellTopologyId : currentCellTopologyIds[iCurrentCell]; if (currentCell.getLevel() != level) { continue; } - if (currentCellIds == nullptr && (usedClusters[layer][currentCell.getFirstClusterIndex()] || - usedClusters[layer + 1][currentCell.getSecondClusterIndex()] || - usedClusters[layer + 2][currentCell.getThirdClusterIndex()])) { - continue; + if (currentCellIds == nullptr) { + bool used = false; + for (int layer = 0; layer < NLayers; ++layer) { + const int clusterIndex = currentCell.getCluster(layer); + used |= clusterIndex != constants::UnusedIndex && usedClusters[layer][clusterIndex]; + } + if (used) { + continue; + } } const int cellId = currentCellIds == nullptr ? iCurrentCell : currentCellIds[iCurrentCell]; + if (cellTopologyId < 0 || neighboursLUT[cellTopologyId] == nullptr || neighbours[cellTopologyId] == nullptr) { + continue; + } - const int startNeighbourId{cellId ? neighboursLUT[cellId - 1] : 0}; - const int endNeighbourId{neighboursLUT[cellId]}; + const int startNeighbourId{neighboursLUT[cellTopologyId][cellId]}; + const int endNeighbourId{neighboursLUT[cellTopologyId][cellId + 1]}; for (int iNeighbourCell{startNeighbourId}; iNeighbourCell < endNeighbourId; ++iNeighbourCell) { - const int neighbourCellId = neighbours[iNeighbourCell]; - const auto& neighbourCell = allCellSeeds[layer - 1][neighbourCellId]; + const auto& neighbourRef = neighbours[cellTopologyId][iNeighbourCell]; + const int neighbourCellTopologyId = neighbourRef.cellTopology; + const int neighbourCellId = neighbourRef.cell; + const auto& neighbourCell = allCellSeeds[neighbourCellTopologyId][neighbourCellId]; if (neighbourCell.getSecondTrackletIndex() != currentCell.getFirstTrackletIndex()) { continue; @@ -515,11 +533,13 @@ GPUg() void __launch_bounds__(256, 1) processNeighboursKernel( if (currentCell.getLevel() - 1 != neighbourCell.getLevel()) { continue; } - if (usedClusters[layer - 1][neighbourCell.getFirstClusterIndex()]) { + const int neighbourLayer = neighbourCell.getInnerLayer(); + const int neighbourCluster = neighbourCell.getFirstClusterIndex(); + if (usedClusters[neighbourLayer][neighbourCluster]) { continue; } TrackSeed seed{currentCell}; - auto& trHit = foundTrackingFrameInfo[layer - 1][neighbourCell.getFirstClusterIndex()]; + auto& trHit = foundTrackingFrameInfo[neighbourLayer][neighbourCluster]; if (!seed.rotate(trHit.alphaTrackingFrame)) { continue; @@ -530,7 +550,7 @@ GPUg() void __launch_bounds__(256, 1) processNeighboursKernel( } if (matCorrType == o2::base::PropagatorF::MatCorrType::USEMatCorrNONE) { - if (!seed.correctForMaterial(layerxX0[layer - 1], layerxX0[layer - 1] * constants::Radl * constants::Rho, true)) { + if (!seed.correctForMaterial(layerxX0[neighbourLayer], layerxX0[neighbourLayer] * constants::Radl * constants::Rho, true)) { continue; } } @@ -546,11 +566,15 @@ GPUg() void __launch_bounds__(256, 1) processNeighboursKernel( if constexpr (dryRun) { foundSeedsTable[iCurrentCell]++; } else { - seed.getClusters()[layer - 1] = neighbourCell.getFirstClusterIndex(); + seed.getClusters()[neighbourLayer] = neighbourCluster; + auto mask = seed.getHitLayerMask(); + mask.set(neighbourLayer); + seed.setHitLayerMask(mask); seed.setLevel(neighbourCell.getLevel()); seed.setFirstTrackletIndex(neighbourCell.getFirstTrackletIndex()); seed.setSecondTrackletIndex(neighbourCell.getSecondTrackletIndex()); updatedCellsIds[foundSeedsTable[iCurrentCell] + foundSeeds] = neighbourCellId; + updatedCellTopologyIds[foundSeedsTable[iCurrentCell] + foundSeeds] = neighbourCellTopologyId; updatedCellSeeds[foundSeedsTable[iCurrentCell] + foundSeeds] = seed; } foundSeeds++; @@ -563,7 +587,9 @@ GPUg() void __launch_bounds__(256, 1) processNeighboursKernel( template void countTrackletsInROFsHandler(const IndexTableUtils* utils, const typename ROFMaskTable::View& rofMask, - const int layer, + const int transitionId, + const int fromLayer, + const int toLayer, const typename ROFOverlapTable::View& rofOverlaps, const typename ROFVertexLookupTable::View& vertexLUT, const int vertexId, @@ -578,20 +604,22 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils, gsl::span trackletsLUTsHost, const bool selectUPCVertices, const float NSigmaCut, - bounded_vector& phiCuts, + const typename TrackingTopology::View topology, + bounded_vector& transitionPhiCuts, const float resolutionPV, std::array& minRs, std::array& maxRs, bounded_vector& resolutions, std::vector& radii, - bounded_vector& mulScatAng, + bounded_vector& transitionMSAngles, o2::its::ExternalAllocator* alloc, gpu::Streams& streams) { - gpu::computeLayerTrackletsMultiROFKernel<<<60, 256, 0, streams[layer].get()>>>( + gpu::computeLayerTrackletsMultiROFKernel<<<60, 256, 0, streams[transitionId].get()>>>( utils, rofMask, - layer, + transitionId, + topology, rofOverlaps, vertexLUT, vertices, @@ -605,21 +633,23 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils, trackletsLUTs, selectUPCVertices, NSigmaCut, - phiCuts[layer], + transitionPhiCuts[transitionId], resolutionPV, - minRs[layer + 1], - maxRs[layer + 1], - resolutions[layer], - radii[layer + 1] - radii[layer], - mulScatAng[layer]); - auto nosync_policy = THRUST_NAMESPACE::par_nosync(gpu::TypedAllocator(alloc)).on(streams[layer].get()); - thrust::exclusive_scan(nosync_policy, trackletsLUTsHost[layer], trackletsLUTsHost[layer] + nClusters[layer] + 1, trackletsLUTsHost[layer]); + minRs[toLayer], + maxRs[toLayer], + resolutions[fromLayer], + radii[toLayer] - radii[fromLayer], + transitionMSAngles[transitionId]); + auto nosync_policy = THRUST_NAMESPACE::par_nosync(gpu::TypedAllocator(alloc)).on(streams[transitionId].get()); + thrust::exclusive_scan(nosync_policy, trackletsLUTsHost[transitionId], trackletsLUTsHost[transitionId] + nClusters[fromLayer] + 1, trackletsLUTsHost[transitionId]); } template void computeTrackletsInROFsHandler(const IndexTableUtils* utils, const typename ROFMaskTable::View& rofMask, - const int layer, + const int transitionId, + const int fromLayer, + const int toLayer, const typename ROFOverlapTable::View& rofOverlaps, const typename ROFVertexLookupTable::View& vertexLUT, const int vertexId, @@ -637,20 +667,22 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils, gsl::span trackletsLUTsHost, const bool selectUPCVertices, const float NSigmaCut, - bounded_vector& phiCuts, + const typename TrackingTopology::View topology, + bounded_vector& transitionPhiCuts, const float resolutionPV, std::array& minRs, std::array& maxRs, bounded_vector& resolutions, std::vector& radii, - bounded_vector& mulScatAng, + bounded_vector& transitionMSAngles, o2::its::ExternalAllocator* alloc, gpu::Streams& streams) { - gpu::computeLayerTrackletsMultiROFKernel<<<60, 256, 0, streams[layer].get()>>>( + gpu::computeLayerTrackletsMultiROFKernel<<<60, 256, 0, streams[transitionId].get()>>>( utils, rofMask, - layer, + transitionId, + topology, rofOverlaps, vertexLUT, vertices, @@ -664,25 +696,25 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils, trackletsLUTs, selectUPCVertices, NSigmaCut, - phiCuts[layer], + transitionPhiCuts[transitionId], resolutionPV, - minRs[layer + 1], - maxRs[layer + 1], - resolutions[layer], - radii[layer + 1] - radii[layer], - mulScatAng[layer]); - thrust::device_ptr tracklets_ptr(spanTracklets[layer]); - auto nosync_policy = THRUST_NAMESPACE::par_nosync(gpu::TypedAllocator(alloc)).on(streams[layer].get()); - thrust::sort(nosync_policy, tracklets_ptr, tracklets_ptr + nTracklets[layer]); - auto unique_end = thrust::unique(nosync_policy, tracklets_ptr, tracklets_ptr + nTracklets[layer]); - nTracklets[layer] = unique_end - tracklets_ptr; - if (layer) { - GPUChkErrS(cudaMemsetAsync(trackletsLUTsHost[layer], 0, (nClusters[layer] + 1) * sizeof(int), streams[layer].get())); - gpu::compileTrackletsLookupTableKernel<<<60, 256, 0, streams[layer].get()>>>( - spanTracklets[layer], - trackletsLUTsHost[layer], - nTracklets[layer]); - thrust::exclusive_scan(nosync_policy, trackletsLUTsHost[layer], trackletsLUTsHost[layer] + nClusters[layer] + 1, trackletsLUTsHost[layer]); + minRs[toLayer], + maxRs[toLayer], + resolutions[fromLayer], + radii[toLayer] - radii[fromLayer], + transitionMSAngles[transitionId]); + thrust::device_ptr tracklets_ptr(spanTracklets[transitionId]); + auto nosync_policy = THRUST_NAMESPACE::par_nosync(gpu::TypedAllocator(alloc)).on(streams[transitionId].get()); + thrust::sort(nosync_policy, tracklets_ptr, tracklets_ptr + nTracklets[transitionId]); + auto unique_end = thrust::unique(nosync_policy, tracklets_ptr, tracklets_ptr + nTracklets[transitionId]); + nTracklets[transitionId] = unique_end - tracklets_ptr; + if (fromLayer > 0) { + GPUChkErrS(cudaMemsetAsync(trackletsLUTsHost[transitionId], 0, (nClusters[fromLayer] + 1) * sizeof(int), streams[transitionId].get())); + gpu::compileTrackletsLookupTableKernel<<<60, 256, 0, streams[transitionId].get()>>>( + spanTracklets[transitionId], + trackletsLUTsHost[transitionId], + nTracklets[transitionId]); + thrust::exclusive_scan(nosync_policy, trackletsLUTsHost[transitionId], trackletsLUTsHost[transitionId] + nClusters[fromLayer] + 1, trackletsLUTsHost[transitionId]); } } @@ -694,7 +726,8 @@ void countCellsHandler( Tracklet** tracklets, int** trackletsLUT, const int nTracklets, - const int layer, + const int cellTopologyId, + const typename TrackingTopology::View topology, CellSeed* cells, int** cellsLUTsArrayDevice, int* cellsLUTsHost, @@ -707,14 +740,15 @@ void countCellsHandler( gpu::Streams& streams) { thrust::device_vector layerxX0(layerxX0Host); - gpu::computeLayerCellsKernel<<<60, 256, 0, streams[layer].get()>>>( - sortedClusters, // const Cluster** - unsortedClusters, // const Cluster** - tfInfo, // const TrackingFrameInfo** - tracklets, // const Tracklets** - trackletsLUT, // const int** - nTracklets, // const int - layer, // const int + gpu::computeLayerCellsKernel<<<60, 256, 0, streams[cellTopologyId].get()>>>( + sortedClusters, // const Cluster** + unsortedClusters, // const Cluster** + tfInfo, // const TrackingFrameInfo** + tracklets, // const Tracklets** + trackletsLUT, // const int** + nTracklets, // const int + cellTopologyId, // const int + topology, cells, // CellSeed* cellsLUTsArrayDevice, // int** thrust::raw_pointer_cast(&layerxX0[0]), @@ -722,7 +756,7 @@ void countCellsHandler( maxChi2ClusterAttachment, // const float cellDeltaTanLambdaSigma, // const float nSigmaCut); // const float - auto nosync_policy = THRUST_NAMESPACE::par_nosync(gpu::TypedAllocator(alloc)).on(streams[layer].get()); + auto nosync_policy = THRUST_NAMESPACE::par_nosync(gpu::TypedAllocator(alloc)).on(streams[cellTopologyId].get()); thrust::exclusive_scan(nosync_policy, cellsLUTsHost, cellsLUTsHost + nTracklets + 1, cellsLUTsHost); } @@ -734,7 +768,8 @@ void computeCellsHandler( Tracklet** tracklets, int** trackletsLUT, const int nTracklets, - const int layer, + const int cellTopologyId, + const typename TrackingTopology::View topology, CellSeed* cells, int** cellsLUTsArrayDevice, int* cellsLUTsHost, @@ -746,14 +781,15 @@ void computeCellsHandler( gpu::Streams& streams) { thrust::device_vector layerxX0(layerxX0Host); - gpu::computeLayerCellsKernel<<<60, 256, 0, streams[layer].get()>>>( - sortedClusters, // const Cluster** - unsortedClusters, // const Cluster** - tfInfo, // const TrackingFrameInfo** - tracklets, // const Tracklets** - trackletsLUT, // const int** - nTracklets, // const int - layer, // const int + gpu::computeLayerCellsKernel<<<60, 256, 0, streams[cellTopologyId].get()>>>( + sortedClusters, // const Cluster** + unsortedClusters, // const Cluster** + tfInfo, // const TrackingFrameInfo** + tracklets, // const Tracklets** + trackletsLUT, // const int** + nTracklets, // const int + cellTopologyId, // const int + topology, cells, // CellSeed* cellsLUTsArrayDevice, // int** thrust::raw_pointer_cast(&layerxX0[0]), @@ -765,64 +801,60 @@ void computeCellsHandler( template void countCellNeighboursHandler(CellSeed** cellsLayersDevice, - int* neighboursLUT, + int* neighboursCursor, int** cellsLUTs, - gpuPair* cellNeighbours, - int* neighboursIndexTable, - const Tracklet** tracklets, + const int sourceCellTopologyId, + const int targetCellTopologyId, const float maxChi2ClusterAttachment, const float bz, - const int layerIndex, const unsigned int nCells, - const unsigned int nCellsNext, - const int maxCellNeighbours, - o2::its::ExternalAllocator* alloc, gpu::Stream& stream) { gpu::computeLayerCellNeighboursKernel<<<60, 256, 0, stream.get()>>>( cellsLayersDevice, - neighboursLUT, - neighboursIndexTable, + neighboursCursor, cellsLUTs, - cellNeighbours, - tracklets, + nullptr, + sourceCellTopologyId, + targetCellTopologyId, maxChi2ClusterAttachment, bz, - layerIndex, - nCells, - maxCellNeighbours); + nCells); +} + +void scanCellNeighboursHandler(int* neighboursCursor, + int* neighboursLUT, + const unsigned int nCells, + o2::its::ExternalAllocator* alloc, + gpu::Stream& stream) +{ auto nosync_policy = THRUST_NAMESPACE::par_nosync(gpu::TypedAllocator(alloc)).on(stream.get()); - thrust::inclusive_scan(nosync_policy, neighboursLUT, neighboursLUT + nCellsNext, neighboursLUT); - thrust::exclusive_scan(nosync_policy, neighboursIndexTable, neighboursIndexTable + nCells + 1, neighboursIndexTable); + thrust::exclusive_scan(nosync_policy, neighboursCursor, neighboursCursor + nCells + 1, neighboursCursor); + GPUChkErrS(cudaMemcpyAsync(neighboursLUT, neighboursCursor, (nCells + 1) * sizeof(int), cudaMemcpyDeviceToDevice, stream.get())); } template void computeCellNeighboursHandler(CellSeed** cellsLayersDevice, - int* neighboursLUT, + int* neighboursCursor, int** cellsLUTs, - gpuPair* cellNeighbours, - int* neighboursIndexTable, - const Tracklet** tracklets, + CellNeighbour* cellNeighbours, + const int sourceCellTopologyId, + const int targetCellTopologyId, const float maxChi2ClusterAttachment, const float bz, - const int layerIndex, const unsigned int nCells, - const unsigned int nCellsNext, - const int maxCellNeighbours, gpu::Stream& stream) { gpu::computeLayerCellNeighboursKernel<<<60, 256, 0, stream.get()>>>( cellsLayersDevice, - neighboursLUT, - neighboursIndexTable, + neighboursCursor, cellsLUTs, cellNeighbours, - tracklets, + sourceCellTopologyId, + targetCellTopologyId, maxChi2ClusterAttachment, bz, - layerIndex, - nCells, - maxCellNeighbours); + nCells); } int filterCellNeighboursHandler(gpuPair* cellNeighbourPairs, @@ -842,19 +874,24 @@ int filterCellNeighboursHandler(gpuPair* cellNeighbourPairs, } template -void processNeighboursHandler(const int startLayer, - const int startLevel, +void processNeighboursHandler(const int startLevel, + const int defaultCellTopologyId, CellSeed** allCellSeeds, CellSeed* currentCellSeeds, - std::array& nCells, + const int* currentCellTopologyIds, + const int* currentCellIds, + const int* nCells, const unsigned char** usedClusters, - std::array& neighbours, - gsl::span neighboursDeviceLUTs, + CellNeighbour** neighbours, + int** neighboursDeviceLUTs, const TrackingFrameInfo** foundTrackingFrameInfo, bounded_vector>& seedsHost, const float bz, const float maxChi2ClusterAttachment, const float maxChi2NDF, + const int maxHoles, + const int minTrackLength, + const LayerMask holeLayerMask, const std::vector& layerxX0Host, const o2::base::Propagator* propagator, const o2::base::PropagatorF::MatCorrType matCorrType, @@ -865,22 +902,24 @@ void processNeighboursHandler(const int startLayer, auto allocInt = gpu::TypedAllocator(alloc); auto allocTrackSeed = gpu::TypedAllocator>(alloc); thrust::device_vector layerxX0(layerxX0Host); - thrust::device_vector> foundSeedsTable(nCells[startLayer] + 1, 0, allocInt); + thrust::device_vector> foundSeedsTable(nCells[defaultCellTopologyId] + 1, 0, allocInt); auto nosync_policy = THRUST_NAMESPACE::par_nosync(gpu::TypedAllocator(alloc)).on(gpu::Stream::DefaultStream); gpu::processNeighboursKernel<<<60, 256>>>( - startLayer, + defaultCellTopologyId, startLevel, allCellSeeds, currentCellSeeds, nullptr, - nCells[startLayer], + nullptr, + nCells[defaultCellTopologyId], + nullptr, nullptr, nullptr, thrust::raw_pointer_cast(&foundSeedsTable[0]), usedClusters, - neighbours[startLayer - 1], - neighboursDeviceLUTs[startLayer - 1], + neighbours, + neighboursDeviceLUTs, foundTrackingFrameInfo, thrust::raw_pointer_cast(&layerxX0[0]), bz, @@ -890,20 +929,23 @@ void processNeighboursHandler(const int startLayer, thrust::exclusive_scan(nosync_policy, foundSeedsTable.begin(), foundSeedsTable.end(), foundSeedsTable.begin()); thrust::device_vector> updatedCellId(foundSeedsTable.back(), 0, allocInt); + thrust::device_vector> updatedCellTopologyId(foundSeedsTable.back(), 0, allocInt); thrust::device_vector, gpu::TypedAllocator>> updatedCellSeed(foundSeedsTable.back(), allocTrackSeed); gpu::processNeighboursKernel<<<60, 256>>>( - startLayer, + defaultCellTopologyId, startLevel, allCellSeeds, currentCellSeeds, nullptr, - nCells[startLayer], + nullptr, + nCells[defaultCellTopologyId], thrust::raw_pointer_cast(&updatedCellSeed[0]), thrust::raw_pointer_cast(&updatedCellId[0]), + thrust::raw_pointer_cast(&updatedCellTopologyId[0]), thrust::raw_pointer_cast(&foundSeedsTable[0]), usedClusters, - neighbours[startLayer - 1], - neighboursDeviceLUTs[startLayer - 1], + neighbours, + neighboursDeviceLUTs, foundTrackingFrameInfo, thrust::raw_pointer_cast(&layerxX0[0]), bz, @@ -914,29 +956,35 @@ void processNeighboursHandler(const int startLayer, int level = startLevel; thrust::device_vector> lastCellId(allocInt); + thrust::device_vector> lastCellTopologyId(allocInt); thrust::device_vector, gpu::TypedAllocator>> lastCellSeed(allocTrackSeed); - for (int iLayer{startLayer - 1}; iLayer > 0 && level > 2; --iLayer) { + while (level > 2 && !updatedCellSeed.empty()) { lastCellSeed.swap(updatedCellSeed); lastCellId.swap(updatedCellId); + lastCellTopologyId.swap(updatedCellTopologyId); thrust::device_vector, gpu::TypedAllocator>>(allocTrackSeed).swap(updatedCellSeed); thrust::device_vector>(allocInt).swap(updatedCellId); + thrust::device_vector>(allocInt).swap(updatedCellTopologyId); auto lastCellSeedSize{lastCellSeed.size()}; foundSeedsTable.resize(lastCellSeedSize + 1); thrust::fill(nosync_policy, foundSeedsTable.begin(), foundSeedsTable.end(), 0); + --level; gpu::processNeighboursKernel><<<60, 256>>>( - iLayer, - --level, + constants::UnusedIndex, + level, allCellSeeds, thrust::raw_pointer_cast(&lastCellSeed[0]), thrust::raw_pointer_cast(&lastCellId[0]), + thrust::raw_pointer_cast(&lastCellTopologyId[0]), lastCellSeedSize, nullptr, nullptr, + nullptr, thrust::raw_pointer_cast(&foundSeedsTable[0]), usedClusters, - neighbours[iLayer - 1], - neighboursDeviceLUTs[iLayer - 1], + neighbours, + neighboursDeviceLUTs, foundTrackingFrameInfo, thrust::raw_pointer_cast(&layerxX0[0]), bz, @@ -948,22 +996,26 @@ void processNeighboursHandler(const int startLayer, auto foundSeeds{foundSeedsTable.back()}; updatedCellId.resize(foundSeeds); thrust::fill(nosync_policy, updatedCellId.begin(), updatedCellId.end(), 0); + updatedCellTopologyId.resize(foundSeeds); + thrust::fill(nosync_policy, updatedCellTopologyId.begin(), updatedCellTopologyId.end(), 0); updatedCellSeed.resize(foundSeeds); thrust::fill(nosync_policy, updatedCellSeed.begin(), updatedCellSeed.end(), TrackSeed()); gpu::processNeighboursKernel><<<60, 256>>>( - iLayer, + constants::UnusedIndex, level, allCellSeeds, thrust::raw_pointer_cast(&lastCellSeed[0]), thrust::raw_pointer_cast(&lastCellId[0]), + thrust::raw_pointer_cast(&lastCellTopologyId[0]), lastCellSeedSize, thrust::raw_pointer_cast(&updatedCellSeed[0]), thrust::raw_pointer_cast(&updatedCellId[0]), + thrust::raw_pointer_cast(&updatedCellTopologyId[0]), thrust::raw_pointer_cast(&foundSeedsTable[0]), usedClusters, - neighbours[iLayer - 1], - neighboursDeviceLUTs[iLayer - 1], + neighbours, + neighboursDeviceLUTs, foundTrackingFrameInfo, thrust::raw_pointer_cast(&layerxX0[0]), bz, @@ -973,7 +1025,7 @@ void processNeighboursHandler(const int startLayer, } GPUChkErrS(cudaStreamSynchronize(gpu::Stream::DefaultStream)); thrust::device_vector, gpu::TypedAllocator>> outSeeds(updatedCellSeed.size(), allocTrackSeed); - auto end = thrust::copy_if(nosync_policy, updatedCellSeed.begin(), updatedCellSeed.end(), outSeeds.begin(), gpu::seed_selector(1.e3, maxChi2NDF * ((startLevel + 2) * 2 - 5))); + auto end = thrust::copy_if(nosync_policy, updatedCellSeed.begin(), updatedCellSeed.end(), outSeeds.begin(), gpu::seed_selector(1.e3, maxChi2NDF * ((startLevel + 2) * 2 - 5), maxHoles, minTrackLength, holeLayerMask)); auto s{end - outSeeds.begin()}; seedsHost.reserve(seedsHost.size() + s); thrust::copy(outSeeds.begin(), outSeeds.begin() + s, std::back_inserter(seedsHost)); @@ -1081,7 +1133,9 @@ void computeTrackSeedHandler(TrackSeed* trackSeeds, /// Explicit instantiation of ITS2 handlers template void countTrackletsInROFsHandler<7>(const IndexTableUtils<7>* utils, const ROFMaskTable<7>::View& rofMask, - const int layer, + const int transitionId, + const int fromLayer, + const int toLayer, const ROFOverlapTable<7>::View& rofOverlaps, const ROFVertexLookupTable<7>::View& vertexLUT, const int vertexId, @@ -1096,19 +1150,22 @@ template void countTrackletsInROFsHandler<7>(const IndexTableUtils<7>* utils, gsl::span trackletsLUTsHost, const bool selectUPCVertices, const float NSigmaCut, - bounded_vector& phiCuts, + const TrackingTopology<7>::View topology, + bounded_vector& transitionPhiCuts, const float resolutionPV, std::array& minRs, std::array& maxRs, bounded_vector& resolutions, std::vector& radii, - bounded_vector& mulScatAng, + bounded_vector& transitionMSAngles, o2::its::ExternalAllocator* alloc, gpu::Streams& streams); template void computeTrackletsInROFsHandler<7>(const IndexTableUtils<7>* utils, const ROFMaskTable<7>::View& rofMask, - const int layer, + const int transitionId, + const int fromLayer, + const int toLayer, const ROFOverlapTable<7>::View& rofOverlaps, const ROFVertexLookupTable<7>::View& vertexLUT, const int vertexId, @@ -1126,13 +1183,14 @@ template void computeTrackletsInROFsHandler<7>(const IndexTableUtils<7>* utils, gsl::span trackletsLUTsHost, const bool selectUPCVertices, const float NSigmaCut, - bounded_vector& phiCuts, + const TrackingTopology<7>::View topology, + bounded_vector& transitionPhiCuts, const float resolutionPV, std::array& minRs, std::array& maxRs, bounded_vector& resolutions, std::vector& radii, - bounded_vector& mulScatAng, + bounded_vector& transitionMSAngles, o2::its::ExternalAllocator* alloc, gpu::Streams& streams); @@ -1142,7 +1200,8 @@ template void countCellsHandler<7>(const Cluster** sortedClusters, Tracklet** tracklets, int** trackletsLUT, const int nTracklets, - const int layer, + const int cellTopologyId, + const TrackingTopology<7>::View topology, CellSeed* cells, int** cellsLUTsArrayDevice, int* cellsLUTsHost, @@ -1160,7 +1219,8 @@ template void computeCellsHandler<7>(const Cluster** sortedClusters, Tracklet** tracklets, int** trackletsLUT, const int nTracklets, - const int layer, + const int cellTopologyId, + const TrackingTopology<7>::View topology, CellSeed* cells, int** cellsLUTsArrayDevice, int* cellsLUTsHost, @@ -1172,47 +1232,44 @@ template void computeCellsHandler<7>(const Cluster** sortedClusters, gpu::Streams& streams); template void countCellNeighboursHandler<7>(CellSeed** cellsLayersDevice, - int* neighboursLUT, + int* neighboursCursor, int** cellsLUTs, - gpuPair* cellNeighbours, - int* neighboursIndexTable, - const Tracklet** tracklets, + const int sourceCellTopologyId, + const int targetCellTopologyId, const float maxChi2ClusterAttachment, const float bz, - const int layerIndex, const unsigned int nCells, - const unsigned int nCellsNext, - const int maxCellNeighbours, - o2::its::ExternalAllocator* alloc, gpu::Stream& stream); template void computeCellNeighboursHandler<7>(CellSeed** cellsLayersDevice, - int* neighboursLUT, + int* neighboursCursor, int** cellsLUTs, - gpuPair* cellNeighbours, - int* neighboursIndexTable, - const Tracklet** tracklets, + CellNeighbour* cellNeighbours, + const int sourceCellTopologyId, + const int targetCellTopologyId, const float maxChi2ClusterAttachment, const float bz, - const int layerIndex, const unsigned int nCells, - const unsigned int nCellsNext, - const int maxCellNeighbours, gpu::Stream& stream); -template void processNeighboursHandler<7>(const int startLayer, - const int startLevel, +template void processNeighboursHandler<7>(const int startLevel, + const int defaultCellTopologyId, CellSeed** allCellSeeds, CellSeed* currentCellSeeds, - std::array& nCells, + const int* currentCellTopologyIds, + const int* currentCellIds, + const int* nCells, const unsigned char** usedClusters, - std::array& neighbours, - gsl::span neighboursDeviceLUTs, + CellNeighbour** neighbours, + int** neighboursDeviceLUTs, const TrackingFrameInfo** foundTrackingFrameInfo, bounded_vector>& seedsHost, const float bz, const float maxChi2ClusterAttachment, const float maxChi2NDF, + const int maxHoles, + const int minTrackLength, + const LayerMask holeLayerMask, const std::vector& layerxX0Host, const o2::base::Propagator* propagator, const o2::base::PropagatorF::MatCorrType matCorrType, @@ -1262,7 +1319,9 @@ template void computeTrackSeedHandler(TrackSeed<7>* trackSeeds, #ifdef ENABLE_UPGRADES template void countTrackletsInROFsHandler<11>(const IndexTableUtils<11>* utils, const ROFMaskTable<11>::View& rofMask, - const int layer, + const int transitionId, + const int fromLayer, + const int toLayer, const ROFOverlapTable<11>::View& rofOverlaps, const ROFVertexLookupTable<11>::View& vertexLUT, const int vertexId, @@ -1277,19 +1336,22 @@ template void countTrackletsInROFsHandler<11>(const IndexTableUtils<11>* utils, gsl::span trackletsLUTsHost, const bool selectUPCVertices, const float NSigmaCut, - bounded_vector& phiCuts, + const TrackingTopology<11>::View topology, + bounded_vector& transitionPhiCuts, const float resolutionPV, std::array& minRs, std::array& maxRs, bounded_vector& resolutions, std::vector& radii, - bounded_vector& mulScatAng, + bounded_vector& transitionMSAngles, o2::its::ExternalAllocator* alloc, gpu::Streams& streams); template void computeTrackletsInROFsHandler<11>(const IndexTableUtils<11>* utils, const ROFMaskTable<11>::View& rofMask, - const int layer, + const int transitionId, + const int fromLayer, + const int toLayer, const ROFOverlapTable<11>::View& rofOverlaps, const ROFVertexLookupTable<11>::View& vertexLUT, const int vertexId, @@ -1307,13 +1369,14 @@ template void computeTrackletsInROFsHandler<11>(const IndexTableUtils<11>* utils gsl::span trackletsLUTsHost, const bool selectUPCVertices, const float NSigmaCut, - bounded_vector& phiCuts, + const TrackingTopology<11>::View topology, + bounded_vector& transitionPhiCuts, const float resolutionPV, std::array& minRs, std::array& maxRs, bounded_vector& resolutions, std::vector& radii, - bounded_vector& mulScatAng, + bounded_vector& transitionMSAngles, o2::its::ExternalAllocator* alloc, gpu::Streams& streams); @@ -1323,7 +1386,8 @@ template void countCellsHandler<11>(const Cluster** sortedClusters, Tracklet** tracklets, int** trackletsLUT, const int nTracklets, - const int layer, + const int cellTopologyId, + const TrackingTopology<11>::View topology, CellSeed* cells, int** cellsLUTsArrayDevice, int* cellsLUTsHost, @@ -1341,7 +1405,8 @@ template void computeCellsHandler<11>(const Cluster** sortedClusters, Tracklet** tracklets, int** trackletsLUT, const int nTracklets, - const int layer, + const int cellTopologyId, + const TrackingTopology<11>::View topology, CellSeed* cells, int** cellsLUTsArrayDevice, int* cellsLUTsHost, @@ -1353,47 +1418,44 @@ template void computeCellsHandler<11>(const Cluster** sortedClusters, gpu::Streams& streams); template void countCellNeighboursHandler<11>(CellSeed** cellsLayersDevice, - int* neighboursLUT, + int* neighboursCursor, int** cellsLUTs, - gpuPair* cellNeighbours, - int* neighboursIndexTable, - const Tracklet** tracklets, + const int sourceCellTopologyId, + const int targetCellTopologyId, const float maxChi2ClusterAttachment, const float bz, - const int layerIndex, const unsigned int nCells, - const unsigned int nCellsNext, - const int maxCellNeighbours, - o2::its::ExternalAllocator* alloc, gpu::Stream& stream); template void computeCellNeighboursHandler<11>(CellSeed** cellsLayersDevice, - int* neighboursLUT, + int* neighboursCursor, int** cellsLUTs, - gpuPair* cellNeighbours, - int* neighboursIndexTable, - const Tracklet** tracklets, + CellNeighbour* cellNeighbours, + const int sourceCellTopologyId, + const int targetCellTopologyId, const float maxChi2ClusterAttachment, const float bz, - const int layerIndex, const unsigned int nCells, - const unsigned int nCellsNext, - const int maxCellNeighbours, gpu::Stream& stream); -template void processNeighboursHandler<11>(const int startLayer, - const int startLevel, +template void processNeighboursHandler<11>(const int startLevel, + const int defaultCellTopologyId, CellSeed** allCellSeeds, CellSeed* currentCellSeeds, - std::array& nCells, + const int* currentCellTopologyIds, + const int* currentCellIds, + const int* nCells, const unsigned char** usedClusters, - std::array& neighbours, - gsl::span neighboursDeviceLUTs, + CellNeighbour** neighbours, + int** neighboursDeviceLUTs, const TrackingFrameInfo** foundTrackingFrameInfo, bounded_vector>& seedsHost, const float bz, const float maxChi2ClusterAttachment, const float maxChi2NDF, + const int maxHoles, + const int minTrackLength, + const LayerMask holeLayerMask, const std::vector& layerxX0Host, const o2::base::Propagator* propagator, const o2::base::PropagatorF::MatCorrType matCorrType, diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cell.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cell.h index c7718ee666311..4706977d08ba6 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cell.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cell.h @@ -16,7 +16,10 @@ #ifndef TRACKINGITSU_INCLUDE_CACELL_H_ #define TRACKINGITSU_INCLUDE_CACELL_H_ +#include + #include "ITStracking/Constants.h" +#include "ITStracking/LayerMask.h" #include "DataFormatsITS/TimeEstBC.h" #include "ReconstructionDataFormats/Track.h" #include "GPUCommonDef.h" @@ -24,11 +27,21 @@ namespace o2::its { +struct CellNeighbour { + int cellTopology{-1}; + int cell{-1}; + int nextCellTopology{-1}; + int nextCell{-1}; + int level{-1}; +}; + template class SeedBase : public o2::track::TrackParCovF { public: - GPUhd() int getInnerLayer() const { return getUserField(); } + GPUhd() LayerMask getHitLayerMask() const { return LayerMask{static_cast(getUserField())}; } + GPUhd() void setHitLayerMask(LayerMask mask) { setUserField(mask.value()); } + GPUhd() int getInnerLayer() const { return getHitLayerMask().first(); } GPUhd() int getFirstTrackletIndex() const { return mTracklets[0]; }; GPUhd() void setFirstTrackletIndex(int trkl) { mTracklets[0] = trkl; }; GPUhd() int getSecondTrackletIndex() const { return mTracklets[1]; }; @@ -71,9 +84,13 @@ class CellSeed final : public SeedBase public: GPUhdDefault() CellSeed() = default; GPUhd() CellSeed(int innerL, int cl0, int cl1, int cl2, int trkl0, int trkl1, const o2::track::TrackParCovF& tpc, float chi2, const TimeEstBC& time) + : CellSeed(LayerMask(innerL, innerL + 1, innerL + 2), cl0, cl1, cl2, trkl0, trkl1, tpc, chi2, time) + { + } + GPUhd() CellSeed(LayerMask hitLayerMask, int cl0, int cl1, int cl2, int trkl0, int trkl1, const o2::track::TrackParCovF& tpc, float chi2, const TimeEstBC& time) : Base(tpc, chi2, 1, time) { - setUserField(innerL); + setHitLayerMask(hitLayerMask); auto& clusters = this->clustersRaw(); clusters[0] = cl0; clusters[1] = cl1; @@ -92,12 +109,12 @@ class CellSeed final : public SeedBase GPUhd() int getThirdClusterIndex() const { return this->clustersRaw()[2]; }; GPUhd() auto& getClusters() { return this->clustersRaw(); } GPUhd() const auto& getClusters() const { return this->clustersRaw(); } - /// getCluster takes an ABSOLUTE layer index and returns UnusedIndex if the - /// layer is outside the 3 stored slots (innerL, innerL+1, innerL+2). + /// getCluster takes an ABSOLUTE layer index. Compact cluster slots are + /// mapped to absolute layers by set-bit order in the hit-layer mask. GPUhd() int getCluster(int layer) const { - const int rel = layer - getInnerLayer(); - return (rel >= 0 && rel < constants::ClustersPerCell) ? this->clustersRaw()[rel] : constants::UnusedIndex; + const int slot = getHitLayerMask().slot(layer); + return (slot >= 0 && slot < constants::ClustersPerCell) ? this->clustersRaw()[slot] : constants::UnusedIndex; } }; @@ -114,14 +131,17 @@ class TrackSeed final : public SeedBase GPUhd() TrackSeed(const CellSeed& cs) : Base(static_cast(cs), cs.getChi2(), cs.getLevel(), cs.getTimeStamp()) { - this->setUserField(cs.getInnerLayer()); + this->setHitLayerMask(cs.getHitLayerMask()); this->setFirstTrackletIndex(cs.getFirstTrackletIndex()); this->setSecondTrackletIndex(cs.getSecondTrackletIndex()); - const int innerL = cs.getInnerLayer(); auto& clusters = this->clustersRaw(); - clusters[innerL + 0] = cs.getFirstClusterIndex(); - clusters[innerL + 1] = cs.getSecondClusterIndex(); - clusters[innerL + 2] = cs.getThirdClusterIndex(); + int slot = 0; + const auto hitMask = cs.getHitLayerMask(); + for (int layer = 0; layer < NLayers; ++layer) { + if (hitMask.has(layer)) { + clusters[layer] = cs.getClusters()[slot++]; + } + } } GPUhdDefault() TrackSeed(const TrackSeed&) = default; GPUhdDefault() ~TrackSeed() = default; @@ -129,14 +149,27 @@ class TrackSeed final : public SeedBase GPUhdDefault() TrackSeed& operator=(const TrackSeed&) = default; GPUhdDefault() TrackSeed& operator=(TrackSeed&&) = default; - /// Three-cluster view of the original cell — note: innerL (UserField) is not - /// updated when processNeighbours extends the cluster list leftward. - GPUhd() int getFirstClusterIndex() const { return this->clustersRaw()[this->getUserField()]; } - GPUhd() int getSecondClusterIndex() const { return this->clustersRaw()[this->getUserField() + 1]; } - GPUhd() int getThirdClusterIndex() const { return this->clustersRaw()[this->getUserField() + 2]; } + GPUhd() int getFirstClusterIndex() const { return getClusterBySlot(0); } + GPUhd() int getSecondClusterIndex() const { return getClusterBySlot(1); } + GPUhd() int getThirdClusterIndex() const { return getClusterBySlot(2); } GPUhd() auto& getClusters() { return this->clustersRaw(); } GPUhd() const auto& getClusters() const { return this->clustersRaw(); } GPUhd() int getCluster(int layer) const { return this->clustersRaw()[layer]; } + + private: + GPUhd() int getClusterBySlot(int requestedSlot) const + { + int slot = 0; + const auto hitMask = this->getHitLayerMask(); + for (int layer = 0; layer < NLayers; ++layer) { + if (hitMask.has(layer)) { + if (slot++ == requestedSlot) { + return this->clustersRaw()[layer]; + } + } + } + return constants::UnusedIndex; + } }; } // namespace o2::its diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Configuration.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Configuration.h index ce7b3e5a87630..c939f39532fdb 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Configuration.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Configuration.h @@ -43,7 +43,12 @@ enum class IterationStep : uint8_t { using IterationSteps = o2::utils::EnumFlags; struct TrackingParameters { - int CellMinimumLevel() const noexcept { return MinTrackLength - constants::ClustersPerCell + 1; } + int CellMinimumLevel() const noexcept + { + const int minClusters = MinTrackLength - (MaxHoles > 0 ? MaxHoles : 0); + const int effectiveMinClusters = minClusters > constants::ClustersPerCell ? minClusters : constants::ClustersPerCell; + return effectiveMinClusters - constants::ClustersPerCell + 1; + } int NeighboursPerRoad() const noexcept { return NLayers - 3; } int CellsPerRoad() const noexcept { return NLayers - 2; } int TrackletsPerRoad() const noexcept { return NLayers - 1; } @@ -68,6 +73,8 @@ struct TrackingParameters { bool AllowSharingFirstCluster = false; int ClusterSharing = 0; int MinTrackLength = 7; + int MaxHoles = 0; + uint16_t HoleLayerMask = 0; float NSigmaCut = 5; float PVres = 1.e-2f; /// Trackleting cuts diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/LayerMask.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/LayerMask.h new file mode 100644 index 0000000000000..9fe9894b3b457 --- /dev/null +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/LayerMask.h @@ -0,0 +1,115 @@ +// Copyright 2019-2026 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef TRACKINGITSU_INCLUDE_LAYERMASK_H_ +#define TRACKINGITSU_INCLUDE_LAYERMASK_H_ + +#include +#include + +#ifndef GPUCA_GPUCODE +#include +#include +#endif + +#include "GPUCommonDef.h" +#include "GPUCommonMath.h" +#include "ITStracking/Constants.h" + +namespace o2::its +{ + +struct LayerMask { + GPUhdDefault() constexpr LayerMask() noexcept = default; + GPUhdDefault() constexpr LayerMask(uint16_t mask) noexcept : mBits{mask} {} + GPUhdDefault() constexpr LayerMask(int layer0, int layer1, int layer2) noexcept + : mBits{static_cast((uint16_t(1) << layer0) | (uint16_t(1) << layer1) | (uint16_t(1) << layer2))} + { + } + GPUhdi() constexpr operator uint16_t() const noexcept { return mBits; } + GPUhdi() constexpr uint16_t value() const noexcept { return mBits; } + GPUhdi() constexpr void set(int layer) noexcept { mBits |= (uint16_t(1) << layer); } + + GPUhdi() LayerMask operator~() const noexcept { return LayerMask{static_cast(~mBits)}; } + GPUhdi() LayerMask operator&(LayerMask other) const noexcept { return LayerMask{static_cast(mBits & other.mBits)}; } + GPUhdi() LayerMask operator|(LayerMask other) const noexcept { return LayerMask{static_cast(mBits | other.mBits)}; } + GPUhdi() LayerMask& operator&=(LayerMask other) noexcept + { + mBits &= other.mBits; + return *this; + } + GPUhdi() LayerMask& operator|=(LayerMask other) noexcept + { + mBits |= other.mBits; + return *this; + } + + GPUhdi() bool empty() const noexcept { return mBits == 0; } + GPUhdi() bool has(int layer) const noexcept { return mBits & (uint16_t(1) << layer); } + GPUhdi() bool isSubsetOf(LayerMask allowed) const noexcept { return (*this & ~allowed).empty(); } + GPUhdi() bool isAllowedHoleMask(int maxHoles, LayerMask allowedHoleMask) const noexcept + { + const int allowedHoles = maxHoles > 0 ? maxHoles : 0; + return count() <= allowedHoles && isSubsetOf(allowedHoleMask); + } + GPUhdi() bool isAllowed(int maxHoles, LayerMask allowedHoleMask) const noexcept + { + return holeMask().isAllowedHoleMask(maxHoles, allowedHoleMask); + } + GPUhdi() int length() const noexcept { return empty() ? 0 : last() - first() + 1; } + GPUhdi() int count() const noexcept { return static_cast(o2::gpu::GPUCommonMath::Popcount(mBits)); } + GPUhdi() int first() const noexcept { return mBits ? static_cast(o2::gpu::GPUCommonMath::Ctz(mBits)) : constants::UnusedIndex; } + GPUhdi() int last() const noexcept { return mBits ? 31 - static_cast(o2::gpu::GPUCommonMath::Clz(mBits)) : constants::UnusedIndex; } + GPUhdi() LayerMask holeMask() const noexcept + { + return empty() ? LayerMask{0} : (span(first(), last()) & ~(*this)); + } + + GPUhdi() int slot(int layer) const noexcept + { + if (!has(layer)) { + return constants::UnusedIndex; + } + const uint32_t lowerLayers = (uint32_t(1) << layer) - 1; + return static_cast(o2::gpu::GPUCommonMath::Popcount(static_cast(mBits) & lowerLayers)); + } + + static GPUhdi() LayerMask span(int fromLayer, int toLayer) noexcept + { + if (fromLayer > toLayer) { + return 0; + } + const uint32_t upper = (uint32_t(1) << (toLayer + 1)) - 1; + const uint32_t lower = (uint32_t(1) << fromLayer) - 1; + return static_cast(upper & ~lower); + } + + static GPUhdi() LayerMask skipped(int fromLayer, int toLayer) noexcept + { + return (toLayer - fromLayer <= 1) ? LayerMask{0} : span(fromLayer + 1, toLayer - 1); + } + +#ifndef GPUCA_GPUCODE + std::string asString() const { return fmt::format("{:016b}", mBits); } +#endif + + private: + uint16_t mBits{0}; +}; + +static_assert(std::is_standard_layout_v); +static_assert(std::is_trivially_copyable_v); +static_assert(sizeof(LayerMask) == sizeof(uint16_t)); +static_assert(alignof(LayerMask) == alignof(uint16_t)); + +} // namespace o2::its + +#endif diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/MathUtils.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/MathUtils.h index d276e27638dbd..950d8c0a9117f 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/MathUtils.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/MathUtils.h @@ -16,6 +16,8 @@ #ifndef O2_ITS_TRACKING_MATHUTILS_H_ #define O2_ITS_TRACKING_MATHUTILS_H_ +#include + #include "CommonConstants/MathConstants.h" #include "ITStracking/Constants.h" #include "MathUtils/Utils.h" diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h index b78540bddfabf..3fef2dc640cbc 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h @@ -32,6 +32,7 @@ #include "ITStracking/ExternalAllocator.h" #include "ITStracking/BoundedAllocator.h" #include "ITStracking/ROFLookupTables.h" +#include "ITStracking/TrackingTopology.h" #include "SimulationDataFormat/MCCompLabel.h" #include "SimulationDataFormat/MCTruthContainer.h" @@ -66,6 +67,7 @@ struct TimeFrame { using ROFOverlapTableN = ROFOverlapTable; using ROFVertexLookupTableN = ROFVertexLookupTable; using ROFMaskTableN = ROFMaskTable; + using TrackingTopologyN = TrackingTopology; using TrackSeedN = TrackSeed; friend class gpu::TimeFrameGPU; @@ -112,10 +114,10 @@ struct TimeFrame { auto& getMaxRs() { return mMaxR; } float getMinR(int layer) const { return mMinR[layer]; } float getMaxR(int layer) const { return mMaxR[layer]; } - float getMSangle(int layer) const { return mMSangles[layer]; } - auto& getMSangles() { return mMSangles; } - float getPhiCut(int layer) const { return mPhiCuts[layer]; } - auto& getPhiCuts() { return mPhiCuts; } + float getTransitionPhiCut(int transitionId) const { return mTransitionPhiCuts[transitionId]; } + float getTransitionMSAngle(int transitionId) const { return mTransitionMSAngles[transitionId]; } + auto& getTransitionPhiCuts() { return mTransitionPhiCuts; } + auto& getTransitionMSAngles() { return mTransitionMSAngles; } float getPositionResolution(int layer) const { return mPositionResolution[layer]; } auto& getPositionResolutions() { return mPositionResolution; } @@ -135,6 +137,8 @@ struct TimeFrame { const auto& getIndexTableUtils() const { return mIndexTableUtils; } const auto& getROFOverlapTable() const { return mROFOverlapTable; } const auto& getROFOverlapTableView() const { return mROFOverlapTableView; } + const auto& getTrackerTopologies() const { return mTrackerTopologies; } + const auto& getTrackingTopologyView() const { return mTrackingTopologyView; } void setROFOverlapTable(ROFOverlapTableN table) { mROFOverlapTable = std::move(table); @@ -177,7 +181,10 @@ struct TimeFrame { auto& getCellsLabel(int layer) { return mCellLabels[layer]; } bool hasMCinformation() const { return mClusterLabels[0] != nullptr; } - void initialise(const TrackingParameters& trkParam, const int maxLayers = NLayers); + void initVertexingTopology(const TrackingParameters& trkParam); + void initDefaultTrackingTopology(const TrackingParameters& trkParam, const int maxLayers = NLayers); + void initTrackerTopologies(gsl::span trkParams, const int maxLayers = NLayers); + void initialise(const TrackingParameters& trkParam, const int maxLayers = NLayers, const int iteration = constants::UnusedIndex); bool isClusterUsed(int layer, int clusterId) const { return mUsedClusters[layer][clusterId]; } void markUsedCluster(int layer, int clusterId) { mUsedClusters[layer][clusterId] = true; } @@ -193,6 +200,7 @@ struct TimeFrame { auto& getCellsLookupTable() { return mCellsLookupTable; } auto& getCellsNeighbours() { return mCellsNeighbours; } + auto& getCellsNeighboursTopology() { return mCellsNeighboursTopology; } auto& getCellsNeighboursLUT() { return mCellsNeighboursLUT; } auto& getTracks() { return mTracks; } auto& getTracksLabel() { return mTracksLabel; } @@ -273,6 +281,7 @@ struct TimeFrame { bounded_vector mTracks; bounded_vector mTracksLabel; std::vector> mCellsNeighbours; + std::vector> mCellsNeighboursTopology; std::vector> mCellsLookupTable; const o2::base::PropagatorImpl* mPropagatorDevice = nullptr; // Needed only for GPU @@ -292,8 +301,8 @@ struct TimeFrame { bool isBeamPositionOverridden = false; std::array mMinR; std::array mMaxR; - bounded_vector mMSangles; - bounded_vector mPhiCuts; + bounded_vector mTransitionPhiCuts; + bounded_vector mTransitionMSAngles; bounded_vector mPositionResolution; std::array, NLayers> mClusterSize; @@ -319,6 +328,10 @@ struct TimeFrame { IndexTableUtilsN mIndexTableUtils; ROFOverlapTableN mROFOverlapTable; ROFOverlapTableN::View mROFOverlapTableView; + TrackingTopologyN mVertexingTopology; + TrackingTopologyN mDefaultTrackingTopology; + std::vector mTrackerTopologies; + typename TrackingTopologyN::View mTrackingTopologyView; ROFVertexLookupTableN mROFVertexLookupTable; ROFVertexLookupTableN::View mROFVertexLookupTableView; ROFMaskTableN mMultiplicityCutMask; diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackHelpers.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackHelpers.h index 885cb0f2b9ca5..d244b39ff9d11 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackHelpers.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackHelpers.h @@ -16,8 +16,6 @@ #ifndef O2_ITS_TRACKING_TRACKHELPERS_H_ #define O2_ITS_TRACKING_TRACKHELPERS_H_ -#include - #include "DataFormatsITS/TrackITS.h" #include "ITStracking/Cell.h" #include "ITStracking/Cluster.h" @@ -29,6 +27,16 @@ namespace o2::its::track { +// Prefer 1) longer track 2) sorted in chi2 +GPUhdi() bool isBetter(const o2::its::TrackITS& a, const o2::its::TrackITS& b) +{ + const auto ncla = a.getNumberOfClusters(); + const auto nclb = b.getNumberOfClusters(); + // is a as long as b ? then decide on chi2 + // otherwise prefer longer + return (ncla == nclb) ? (a.getChi2() < b.getChi2()) : ncla > nclb; +} + // Find the populated interior layer closest to the radial midpoint. // If no layer can be found, return constants::UnusedIndex. // Should minimize the sagitta bias. diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h index aa4592c63f404..647403bb6b548 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h @@ -45,7 +45,7 @@ class TrackerTraits virtual ~TrackerTraits() = default; virtual void adoptTimeFrame(TimeFrame* tf) { mTimeFrame = tf; } - virtual void initialiseTimeFrame(const int iteration) { mTimeFrame->initialise(mTrkParams[iteration], mTrkParams[iteration].NLayers); } + virtual void initialiseTimeFrame(const int iteration) { mTimeFrame->initialise(mTrkParams[iteration], mTrkParams[iteration].NLayers, iteration); } virtual void computeLayerTracklets(const int iteration, int iVertex); virtual void computeLayerCells(const int iteration); @@ -53,7 +53,7 @@ class TrackerTraits virtual void findRoads(const int iteration); template - void processNeighbours(int iteration, int iLayer, int iLevel, const bounded_vector& currentCellSeed, const bounded_vector& currentCellId, bounded_vector& updatedCellSeed, bounded_vector& updatedCellId); + void processNeighbours(int iteration, int defaultCellTopologyId, int iLevel, const bounded_vector& currentCellSeed, const bounded_vector& currentCellId, const bounded_vector& currentCellTopologyId, bounded_vector& updatedCellSeed, bounded_vector& updatedCellId, bounded_vector& updatedCellTopologyId); void acceptTracks(int iteration, bounded_vector& tracks, bounded_vector>& firstClusters, bounded_vector>& sharedFirstClusters); void markTracks(int iteration, bounded_vector>& sharedFirstClusters); diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h index 1d997ef12147a..21b4f928d5b73 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h @@ -71,6 +71,8 @@ struct TrackerParamConfig : public o2::conf::ConfigurableParamHelper0, otherwise use code defaults uint8_t startLayerMask[constants::MaxIter] = {}; // mask of start layer for this iteration (if >0) + int maxHolesIter[constants::MaxIter] = {}; // maximum number of missing internal layers allowed in the CA topology for each iteration + uint16_t holeLayerMaskIter[constants::MaxIter] = {}; // layers that may be skipped by the CA topology for each iteration float minPtIterLgt[constants::MaxIter * (MaxTrackLength - MinTrackLength + 1)] = {}; // min.pT for given track length at this iteration, used only if >0, otherwise use code defaults float sysErrY2[7] = {0}; // systematic error^2 in Y per layer float sysErrZ2[7] = {0}; // systematic error^2 in Z per layer diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingTopology.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingTopology.h new file mode 100644 index 0000000000000..2afb67609664f --- /dev/null +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingTopology.h @@ -0,0 +1,219 @@ +// Copyright 2019-2026 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef TRACKINGITSU_INCLUDE_TRACKINGTOPOLOGY_H_ +#define TRACKINGITSU_INCLUDE_TRACKINGTOPOLOGY_H_ + +#include +#include +#include +#include + +#ifndef GPUCA_GPUCODE +#include +#include +#include "Framework/Logger.h" +#endif + +#include "CommonDataFormat/RangeReference.h" +#include "GPUCommonDef.h" +#include "GPUCommonMath.h" +#include "ITStracking/LayerMask.h" + +namespace o2::its +{ + +template +class TrackingTopology +{ + public: + using Id = uint8_t; + using Mask = LayerMask; + using Range = o2::dataformats::RangeReference; + static constexpr int MaxTransitions = (NLayers * (NLayers - 1)) / 2; + static constexpr int MaxCells = (NLayers * (NLayers - 1) * (NLayers - 2)) / 6; + static_assert(NLayers < std::numeric_limits::max()); + static_assert(MaxTransitions <= std::numeric_limits::max()); + static_assert(MaxCells <= std::numeric_limits::max()); + + // Describes from which layer to which layer the look-up happens + struct LayerTransition { + Id fromLayer{0}; + Id toLayer{0}; + }; + static_assert(std::is_standard_layout_v); + static_assert(std::is_trivially_copyable_v); + static_assert(sizeof(LayerTransition) == (2 * sizeof(Id))); + + // Describes from which LayerTransition a tracklet is allowed to originate + // and with which LayerTransition this can be combined additionally the hitMasked is cached + struct CellTopology { + Id firstTransition{0}; + Id secondTransition{0}; + Mask hitLayerMask{0}; + }; + static_assert(std::is_standard_layout_v); + static_assert(std::is_trivially_copyable_v); + static_assert(sizeof(CellTopology) == (2 * sizeof(Id)) + sizeof(Mask)); + + // GPU ready view of the underlying LUTs + struct View { + const LayerTransition* transitions{nullptr}; + const CellTopology* cells{nullptr}; + const Range* cellsByFirstTransitionIndex{nullptr}; + const Id* cellsByFirstTransition{nullptr}; + Id nTransitions{0}; + Id nCells{0}; + Id nCellsByFirstTransition{0}; + + GPUhdi() const LayerTransition& getTransition(Id id) const { return transitions[id]; } + GPUhdi() const CellTopology& getCell(Id id) const { return cells[id]; } + GPUhdi() Range getCellsStartingWithTransition(Id transitionId) const { return cellsByFirstTransitionIndex[transitionId]; } + +#ifndef GPUCA_GPUCODE + std::string asString() const + { + std::string out = fmt::format("TrackingTopology: transitions={} cells={}", nTransitions, nCells); + out += "\n transitions:"; + for (Id transitionId = 0; transitionId < nTransitions; ++transitionId) { + const auto& t = transitions[transitionId]; + out += fmt::format("\n {}: {} -> {}", transitionId, t.fromLayer, t.toLayer); + } + out += "\n cells:"; + for (Id cellId = 0; cellId < nCells; ++cellId) { + const auto& c = cells[cellId]; + const auto& first = transitions[c.firstTransition]; + const auto& second = transitions[c.secondTransition]; + out += fmt::format("\n {}: {} -> {} -> {} hitMask={} transitions=({}, {})", cellId, first.fromLayer, first.toLayer, second.toLayer, c.hitLayerMask.asString(), c.firstTransition, c.secondTransition); + } + return out; + } + + void print() const + { + LOGP(info, "{}", asString()); + } +#endif + }; + + void init(int maxLayers, int maxHoles, Mask holeLayerMask) + { + clear(); + mMaxLayers = o2::gpu::CAMath::Max(0, o2::gpu::CAMath::Min(maxLayers, NLayers)); + mMaxHoles = o2::gpu::CAMath::Max(maxHoles, 0); + mHoleLayerMask = holeLayerMask; + for (int fromLayer = 0; fromLayer < mMaxLayers; ++fromLayer) { + for (int toLayer = fromLayer + 1; toLayer < mMaxLayers; ++toLayer) { + if (Mask::skipped(fromLayer, toLayer).isAllowedHoleMask(mMaxHoles, mHoleLayerMask)) { + mTransitions[mNTransitions++] = LayerTransition{static_cast(fromLayer), static_cast(toLayer)}; + } + } + } + + for (Id firstId = 0; firstId < mNTransitions; ++firstId) { + const auto& first = mTransitions[firstId]; + for (Id secondId = 0; secondId < mNTransitions; ++secondId) { + const auto& second = mTransitions[secondId]; + if (first.toLayer != second.fromLayer) { + continue; + } + const Mask hitMask{first.fromLayer, first.toLayer, second.toLayer}; + if (hitMask.isAllowed(mMaxHoles, mHoleLayerMask)) { + mCells[mNCells++] = CellTopology{firstId, secondId, hitMask}; + } + } + } + + fillCellsByTransition(); + } + + View getView() const + { + return View{mTransitions.data(), + mCells.data(), + mCellsByFirstTransitionIndex.data(), + mCellsByFirstTransition.data(), + mNTransitions, + mNCells, + mNCellsByFirstTransition}; + } + + View getDeviceView(const LayerTransition* deviceTransitions, + const CellTopology* deviceCells, + const Range* deviceCellsByFirstTransitionIndex, + const Id* deviceCellsByFirstTransition) const + { + return View{deviceTransitions, + deviceCells, + deviceCellsByFirstTransitionIndex, + deviceCellsByFirstTransition, + mNTransitions, + mNCells, + mNCellsByFirstTransition}; + } + + const auto& getTransitions() const noexcept { return mTransitions; } + const auto& getCells() const noexcept { return mCells; } + const auto& getCellsByFirstTransitionIndex() const noexcept { return mCellsByFirstTransitionIndex; } + const auto& getCellsByFirstTransition() const noexcept { return mCellsByFirstTransition; } + Id getNTransitions() const noexcept { return mNTransitions; } + Id getNCells() const noexcept { return mNCells; } + Id getNCellsByFirstTransition() const noexcept { return mNCellsByFirstTransition; } + + private: + void clear() + { + mNTransitions = 0; + mNCells = 0; + mNCellsByFirstTransition = 0; + mTransitions.fill({}); + mCells.fill({}); + mCellsByFirstTransitionIndex.fill(Range{0, 0}); + mCellsByFirstTransition.fill(0); + } + + void fillCellsByTransition() + { + std::array counts{}; + for (Id cellId = 0; cellId < mNCells; ++cellId) { + ++counts[mCells[cellId].firstTransition]; + } + + Id offset = 0; + for (Id transitionId = 0; transitionId < mNTransitions; ++transitionId) { + mCellsByFirstTransitionIndex[transitionId].setFirstEntry(offset); + mCellsByFirstTransitionIndex[transitionId].setEntries(counts[transitionId]); + offset += counts[transitionId]; + } + + std::array cursor{}; + for (Id cellId = 0; cellId < mNCells; ++cellId) { + const Id transitionId = mCells[cellId].firstTransition; + mCellsByFirstTransition[mCellsByFirstTransitionIndex[transitionId].getFirstEntry() + cursor[transitionId]++] = cellId; + } + mNCellsByFirstTransition = offset; + } + + int mMaxLayers{0}; + int mMaxHoles{0}; + Mask mHoleLayerMask{0}; + Id mNTransitions{0}; + Id mNCells{0}; + Id mNCellsByFirstTransition{0}; + std::array mTransitions{}; + std::array mCells{}; + std::array mCellsByFirstTransitionIndex{}; + std::array mCellsByFirstTransition{}; +}; + +} // namespace o2::its + +#endif diff --git a/Detectors/ITSMFT/ITS/tracking/src/Configuration.cxx b/Detectors/ITSMFT/ITS/tracking/src/Configuration.cxx index c425d467a8061..0087da0a85ac2 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/Configuration.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/Configuration.cxx @@ -24,8 +24,8 @@ using namespace o2::its; std::string TrackingParameters::asString() const { - std::string str = std::format("NZb:{} NPhB:{} PerVtx:{} DropFail:{} ClSh:{} TtklMinPt:{:.2f} MinCl:{}", - ZBins, PhiBins, PerPrimaryVertexProcessing, DropTFUponFailure, ClusterSharing, TrackletMinPt, MinTrackLength); + std::string str = std::format("NZb:{} NPhB:{} PerVtx:{} DropFail:{} ClSh:{} TtklMinPt:{:.2f} MinCl:{} MaxHoles:{} HoleMask:{:#x}", + ZBins, PhiBins, PerPrimaryVertexProcessing, DropTFUponFailure, ClusterSharing, TrackletMinPt, MinTrackLength, MaxHoles, HoleLayerMask); bool first = true; for (int il = NLayers; il >= MinTrackLength; il--) { int slot = NLayers - il; @@ -204,6 +204,11 @@ std::vector TrackingMode::getTrackingParameters(TrackingMode p.SaveTimeBenchmarks = tc.saveTimeBenchmarks; p.FataliseUponFailure = tc.fataliseUponFailure; p.AllowSharingFirstCluster = tc.allowSharingFirstCluster; + const auto iter = &p - trackParams.data(); + if (iter < constants::MaxIter) { + p.MaxHoles = tc.maxHolesIter[iter]; + p.HoleLayerMask = tc.holeLayerMaskIter[iter]; + } if (tc.useMatCorrTGeo) { p.CorrType = o2::base::PropagatorImpl::MatCorrType::USEMatCorrTGeo; diff --git a/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx b/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx index fc99bf0f35403..8375004cbfbad 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx @@ -241,8 +241,32 @@ void TimeFrame::prepareClusters(const TrackingParameters& trkParam, con } template -void TimeFrame::initialise(const TrackingParameters& trkParam, const int maxLayers) +void TimeFrame::initVertexingTopology(const TrackingParameters& trkParam) { + mVertexingTopology.init(3, trkParam.MaxHoles, trkParam.HoleLayerMask); +} + +template +void TimeFrame::initDefaultTrackingTopology(const TrackingParameters& trkParam, const int maxLayers) +{ + mDefaultTrackingTopology.init(maxLayers, trkParam.MaxHoles, trkParam.HoleLayerMask); +} + +template +void TimeFrame::initTrackerTopologies(gsl::span trkParams, const int maxLayers) +{ + mTrackerTopologies.resize(trkParams.size()); + for (size_t iteration = 0; iteration < trkParams.size(); ++iteration) { + const int iterationMaxLayers = std::min(maxLayers, trkParams[iteration].NLayers); + mTrackerTopologies[iteration].init(iterationMaxLayers, trkParams[iteration].MaxHoles, trkParams[iteration].HoleLayerMask); + } +} + +template +void TimeFrame::initialise(const TrackingParameters& trkParam, const int maxLayers, const int iteration) +{ + mTrackingTopologyView = iteration != constants::UnusedIndex ? mTrackerTopologies[iteration].getView() : (maxLayers == 3 ? mVertexingTopology.getView() : mDefaultTrackingTopology.getView()); + if (trkParam.PassFlags[IterationStep::FirstPass]) { deepVectorClear(mTracks); deepVectorClear(mTracksLabel); @@ -253,14 +277,6 @@ void TimeFrame::initialise(const TrackingParameters& trkParam, const in deepVectorClear(mPrimaryVerticesLabels); } clearResizeBoundedVector(mLinesLabels, getNrof(1), mMemoryPool.get()); - clearResizeBoundedVector(mCells, trkParam.CellsPerRoad(), mMemoryPool.get()); - clearResizeBoundedVector(mCellsLookupTable, trkParam.CellsPerRoad() - 1, mMemoryPool.get()); - clearResizeBoundedVector(mCellsNeighbours, trkParam.CellsPerRoad() - 1, mMemoryPool.get()); - clearResizeBoundedVector(mCellsNeighboursLUT, trkParam.CellsPerRoad() - 1, mMemoryPool.get()); - clearResizeBoundedVector(mCellLabels, trkParam.CellsPerRoad(), mMemoryPool.get()); - clearResizeBoundedVector(mTracklets, std::min(trkParam.TrackletsPerRoad(), maxLayers - 1), mMemoryPool.get()); - clearResizeBoundedVector(mTrackletLabels, trkParam.TrackletsPerRoad(), mMemoryPool.get()); - clearResizeBoundedVector(mTrackletsLookupTable, trkParam.TrackletsPerRoad(), mMemoryPool.get()); mIndexTableUtils.setTrackingParameters(trkParam); clearResizeBoundedVector(mPositionResolution, trkParam.NLayers, mMemoryPool.get()); clearResizeBoundedVector(mBogusClusters, trkParam.NLayers, mMemoryPool.get()); @@ -289,6 +305,17 @@ void TimeFrame::initialise(const TrackingParameters& trkParam, const in mMinR.fill(std::numeric_limits::max()); mMaxR.fill(std::numeric_limits::min()); } + clearResizeBoundedVector(mCells, mTrackingTopologyView.nCells, mMemoryPool.get()); + clearResizeBoundedVector(mCellsLookupTable, mTrackingTopologyView.nCells, mMemoryPool.get()); + clearResizeBoundedVector(mCellsNeighbours, mTrackingTopologyView.nCells, mMemoryPool.get()); + clearResizeBoundedVector(mCellsNeighboursTopology, mTrackingTopologyView.nCells, mMemoryPool.get()); + clearResizeBoundedVector(mCellsNeighboursLUT, mTrackingTopologyView.nCells, mMemoryPool.get()); + clearResizeBoundedVector(mCellLabels, mTrackingTopologyView.nCells, mMemoryPool.get()); + clearResizeBoundedVector(mTracklets, mTrackingTopologyView.nTransitions, mMemoryPool.get()); + clearResizeBoundedVector(mTrackletLabels, mTrackingTopologyView.nTransitions, mMemoryPool.get()); + clearResizeBoundedVector(mTrackletsLookupTable, mTrackingTopologyView.nTransitions, mMemoryPool.get()); + clearResizeBoundedVector(mTransitionPhiCuts, mTrackingTopologyView.nTransitions, mMemoryPool.get()); + clearResizeBoundedVector(mTransitionMSAngles, mTrackingTopologyView.nTransitions, mMemoryPool.get()); mNTrackletsPerROF.resize(2); for (auto& v : mNTrackletsPerROF) { v = bounded_vector(getNrof(1) + 1, 0, mMemoryPool.get()); @@ -304,42 +331,48 @@ void TimeFrame::initialise(const TrackingParameters& trkParam, const in } } - mMSangles.resize(trkParam.NLayers); - mPhiCuts.resize(mClusters.size() - 1, 0.f); - float oneOverR{0.001f * 0.3f * std::abs(mBz) / trkParam.TrackletMinPt}; + // estimate MS per layer + std::array msAngles{}; for (unsigned int iLayer{0}; iLayer < NLayers; ++iLayer) { - mMSangles[iLayer] = math_utils::MSangle(0.14f, trkParam.TrackletMinPt, trkParam.LayerxX0[iLayer]); + msAngles[iLayer] = math_utils::MSangle(0.14f, trkParam.TrackletMinPt, trkParam.LayerxX0[iLayer]); mPositionResolution[iLayer] = o2::gpu::CAMath::Sqrt((0.5f * (trkParam.SystErrorZ2[iLayer] + trkParam.SystErrorY2[iLayer])) + (trkParam.LayerResolution[iLayer] * trkParam.LayerResolution[iLayer])); - if (iLayer < mClusters.size() - 1) { - const float& r1 = trkParam.LayerRadii[iLayer]; - const float& r2 = trkParam.LayerRadii[iLayer + 1]; - oneOverR = (0.5 * oneOverR >= 1.f / r2) ? (2.f / r2) - o2::constants::math::Almost0 : oneOverR; - const float res1 = o2::gpu::CAMath::Hypot(trkParam.PVres, mPositionResolution[iLayer]); - const float res2 = o2::gpu::CAMath::Hypot(trkParam.PVres, mPositionResolution[iLayer + 1]); - const float cosTheta1half = o2::gpu::CAMath::Sqrt(1.f - math_utils::Sq(0.5f * r1 * oneOverR)); - const float cosTheta2half = o2::gpu::CAMath::Sqrt(1.f - math_utils::Sq(0.5f * r2 * oneOverR)); - float x = (r2 * cosTheta1half) - (r1 * cosTheta2half); - float delta = o2::gpu::CAMath::Sqrt(1.f / (1.f - 0.25f * math_utils::Sq(x * oneOverR)) * (math_utils::Sq((0.25f * r1 * r2 * math_utils::Sq(oneOverR) / cosTheta2half) + cosTheta1half) * math_utils::Sq(res1) + math_utils::Sq((0.25f * r1 * r2 * math_utils::Sq(oneOverR) / cosTheta1half) + cosTheta2half) * math_utils::Sq(res2))); - /// the expression std::asin(0.5f * x * oneOverR) is equivalent to std::aCos(0.5f * r1 * oneOverR) - std::acos(0.5 * r2 * oneOverR) - mPhiCuts[iLayer] = std::min(o2::gpu::CAMath::ASin(0.5f * x * oneOverR) + 2.f * mMSangles[iLayer] + delta, o2::constants::math::PI * 0.5f); - } } - for (int iLayer{0}; iLayer < std::min((int)mTracklets.size(), maxLayers); ++iLayer) { - deepVectorClear(mTracklets[iLayer]); - deepVectorClear(mTrackletLabels[iLayer]); - if (iLayer < (int)mCells.size()) { - deepVectorClear(mCells[iLayer]); - deepVectorClear(mTrackletsLookupTable[iLayer]); - mTrackletsLookupTable[iLayer].resize(mClusters[iLayer + 1].size() + 1, 0); - deepVectorClear(mCellLabels[iLayer]); + // for each transition calculate the phi-cuts + integrated MS + float oneOverR{0.001f * 0.3f * std::abs(mBz) / trkParam.TrackletMinPt}; + for (int transitionId{0}; transitionId < (int)mTracklets.size(); ++transitionId) { + const auto& transition = mTrackingTopologyView.getTransition(transitionId); + float ms2 = 0.; + for (int layer = transition.fromLayer; layer < transition.toLayer; ++layer) { + ms2 += math_utils::Sq(msAngles[layer]); } + mTransitionMSAngles[transitionId] = o2::gpu::CAMath::Sqrt(ms2); + const float& r1 = trkParam.LayerRadii[transition.fromLayer]; + const float& r2 = trkParam.LayerRadii[transition.toLayer]; + oneOverR = (0.5 * oneOverR >= 1.f / r2) ? (2.f / r2) - o2::constants::math::Almost0 : oneOverR; + const float res1 = o2::gpu::CAMath::Hypot(trkParam.PVres, mPositionResolution[transition.fromLayer]); + const float res2 = o2::gpu::CAMath::Hypot(trkParam.PVres, mPositionResolution[transition.toLayer]); + const float cosTheta1half = o2::gpu::CAMath::Sqrt(1.f - math_utils::Sq(0.5f * r1 * oneOverR)); + const float cosTheta2half = o2::gpu::CAMath::Sqrt(1.f - math_utils::Sq(0.5f * r2 * oneOverR)); + float x = (r2 * cosTheta1half) - (r1 * cosTheta2half); + float delta = o2::gpu::CAMath::Sqrt(1.f / (1.f - 0.25f * math_utils::Sq(x * oneOverR)) * (math_utils::Sq((0.25f * r1 * r2 * math_utils::Sq(oneOverR) / cosTheta2half) + cosTheta1half) * math_utils::Sq(res1) + math_utils::Sq((0.25f * r1 * r2 * math_utils::Sq(oneOverR) / cosTheta1half) + cosTheta2half) * math_utils::Sq(res2))); + /// the expression std::asin(0.5f * x * oneOverR) is equivalent to std::aCos(0.5f * r1 * oneOverR) - std::acos(0.5 * r2 * oneOverR) + mTransitionPhiCuts[transitionId] = o2::gpu::CAMath::Min(o2::gpu::CAMath::ASin(0.5f * x * oneOverR) + 2.f * mTransitionMSAngles[transitionId] + delta, o2::constants::math::PI * 0.5f); + + // some cleanup + deepVectorClear(mTracklets[transitionId]); + deepVectorClear(mTrackletLabels[transitionId]); + deepVectorClear(mTrackletsLookupTable[transitionId]); + mTrackletsLookupTable[transitionId].resize(mClusters[transition.fromLayer].size() + 1, 0); + } - if (iLayer < (int)mCells.size() - 1) { - deepVectorClear(mCellsLookupTable[iLayer]); - deepVectorClear(mCellsNeighbours[iLayer]); - deepVectorClear(mCellsNeighboursLUT[iLayer]); - } + for (int cellId{0}; cellId < (int)mCells.size(); ++cellId) { + deepVectorClear(mCells[cellId]); + deepVectorClear(mCellsLookupTable[cellId]); + deepVectorClear(mCellsNeighbours[cellId]); + deepVectorClear(mCellsNeighboursTopology[cellId]); + deepVectorClear(mCellsNeighboursLUT[cellId]); + deepVectorClear(mCellLabels[cellId]); } } @@ -356,6 +389,9 @@ unsigned long TimeFrame::getArtefactsMemory() const for (const auto& cellsN : mCellsNeighbours) { size += sizeof(int) * cellsN.size(); } + for (const auto& cellsN : mCellsNeighboursTopology) { + size += sizeof(int) * cellsN.size(); + } return size; } @@ -401,8 +437,8 @@ void TimeFrame::setMemoryPool(std::shared_ptr po initContainers(mNTrackletsPerClusterSum); initContainers(mNClustersPerROF); initVector(mPrimaryVertices); - initVector(mMSangles); - initVector(mPhiCuts); + initVector(mTransitionPhiCuts); + initVector(mTransitionMSAngles); initVector(mPositionResolution); initContainers(mClusterSize); initVector(mPValphaX); @@ -442,6 +478,7 @@ void TimeFrame::wipe() deepVectorClear(mTracklets); deepVectorClear(mCells); deepVectorClear(mCellsNeighbours); + deepVectorClear(mCellsNeighboursTopology); deepVectorClear(mCellsLookupTable); deepVectorClear(mPrimaryVertices); deepVectorClear(mTrackletsLookupTable); @@ -449,8 +486,8 @@ void TimeFrame::wipe() deepVectorClear(mNTrackletsPerCluster); deepVectorClear(mNTrackletsPerClusterSum); deepVectorClear(mNClustersPerROF); - deepVectorClear(mMSangles); - deepVectorClear(mPhiCuts); + deepVectorClear(mTransitionPhiCuts); + deepVectorClear(mTransitionMSAngles); deepVectorClear(mPositionResolution); deepVectorClear(mClusterSize); deepVectorClear(mPValphaX); diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx index 19cae4b70f158..9fef067559e8a 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx @@ -15,25 +15,23 @@ #include #include -#include #include #include #include -#include +#include -#include "CommonConstants/MathConstants.h" #include "DetectorsBase/Propagator.h" #include "GPUCommonMath.h" #include "ITStracking/BoundedAllocator.h" #include "ITStracking/Cell.h" #include "ITStracking/Constants.h" #include "ITStracking/IndexTableUtils.h" +#include "ITStracking/LayerMask.h" #include "ITStracking/ROFLookupTables.h" #include "ITStracking/TrackerTraits.h" #include "ITStracking/TrackHelpers.h" #include "ITStracking/Tracklet.h" -#include "ReconstructionDataFormats/Track.h" namespace o2::its { @@ -47,23 +45,23 @@ struct PassMode { template void TrackerTraits::computeLayerTracklets(const int iteration, int iVertex) { - for (int iLayer = 0; iLayer < mTrkParams[iteration].TrackletsPerRoad(); ++iLayer) { - mTimeFrame->getTracklets()[iLayer].clear(); - mTimeFrame->getTrackletsLabel(iLayer).clear(); - if (iLayer > 0) { - std::fill(mTimeFrame->getTrackletsLookupTable()[iLayer - 1].begin(), mTimeFrame->getTrackletsLookupTable()[iLayer - 1].end(), 0); - } + const auto topology = mTimeFrame->getTrackingTopologyView(); + for (int transitionId = 0; transitionId < topology.nTransitions; ++transitionId) { + mTimeFrame->getTracklets()[transitionId].clear(); + mTimeFrame->getTrackletsLabel(transitionId).clear(); + std::fill(mTimeFrame->getTrackletsLookupTable()[transitionId].begin(), mTimeFrame->getTrackletsLookupTable()[transitionId].end(), 0); } const Vertex diamondVert(mTrkParams[iteration].Diamond, mTrkParams[iteration].DiamondCov, 1, 1.f); gsl::span diamondSpan(&diamondVert, 1); mTaskArena->execute([&] { - auto forTracklets = [&](auto Tag, int iLayer, int pivotROF, int base, int& offset) -> int { - if (!mTimeFrame->getROFMaskView().isROFEnabled(iLayer, pivotROF)) { + auto forTracklets = [&](auto Tag, int transitionId, int pivotROF, int base, int& offset) -> int { + const auto& transition = topology.getTransition(transitionId); + if (!mTimeFrame->getROFMaskView().isROFEnabled(transition.fromLayer, pivotROF)) { return 0; } - gsl::span primaryVertices = mTrkParams[iteration].UseDiamond ? diamondSpan : mTimeFrame->getPrimaryVertices(iLayer, pivotROF); + gsl::span primaryVertices = mTrkParams[iteration].UseDiamond ? diamondSpan : mTimeFrame->getPrimaryVertices(transition.fromLayer, pivotROF); if (primaryVertices.empty()) { return 0; } @@ -73,46 +71,46 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iVer return 0; } - // does this layer have any overlap with the next layer - const auto& rofOverlap = mTimeFrame->getROFOverlapTableView().getOverlap(iLayer, iLayer + 1, pivotROF); + const auto& rofOverlap = mTimeFrame->getROFOverlapTableView().getOverlap(transition.fromLayer, transition.toLayer, pivotROF); if (!rofOverlap.getEntries()) { return 0; } int localCount = 0; - auto& tracklets = mTimeFrame->getTracklets()[iLayer]; - auto layer0 = mTimeFrame->getClustersOnLayer(pivotROF, iLayer); + auto& tracklets = mTimeFrame->getTracklets()[transitionId]; + auto layer0 = mTimeFrame->getClustersOnLayer(pivotROF, transition.fromLayer); if (layer0.empty()) { return 0; } - const float meanDeltaR = mTrkParams[iteration].LayerRadii[iLayer + 1] - mTrkParams[iteration].LayerRadii[iLayer]; + const float meanDeltaR = mTrkParams[iteration].LayerRadii[transition.toLayer] - mTrkParams[iteration].LayerRadii[transition.fromLayer]; + const float phiCut = mTimeFrame->getTransitionPhiCut(transitionId); + const float msAngle = mTimeFrame->getTransitionMSAngle(transitionId); for (int iCluster = 0; iCluster < int(layer0.size()); ++iCluster) { const Cluster& currentCluster = layer0[iCluster]; - const int currentSortedIndex = mTimeFrame->getSortedIndex(pivotROF, iLayer, iCluster); - if (mTimeFrame->isClusterUsed(iLayer, currentCluster.clusterId)) { + const int currentSortedIndex = mTimeFrame->getSortedIndex(pivotROF, transition.fromLayer, iCluster); + if (mTimeFrame->isClusterUsed(transition.fromLayer, currentCluster.clusterId)) { continue; } const float inverseR0 = 1.f / currentCluster.radius; for (int iV = startVtx; iV < endVtx; ++iV) { const auto& pv = primaryVertices[iV]; - if (!mTimeFrame->getROFVertexLookupTableView().isVertexCompatible(iLayer, pivotROF, pv)) { + if (!mTimeFrame->getROFVertexLookupTableView().isVertexCompatible(transition.fromLayer, pivotROF, pv)) { continue; } if (pv.isFlagSet(Vertex::Flags::UPCMode) != mTrkParams[iteration].PassFlags[IterationStep::SelectUPCVertices]) { continue; } - const float resolution = o2::gpu::CAMath::Sqrt(math_utils::Sq(mTimeFrame->getPositionResolution(iLayer)) + math_utils::Sq(mTrkParams[iteration].PVres) / float(pv.getNContributors())); + const float resolution = o2::gpu::CAMath::Sqrt(math_utils::Sq(mTimeFrame->getPositionResolution(transition.fromLayer)) + math_utils::Sq(mTrkParams[iteration].PVres) / float(pv.getNContributors())); const float tanLambda = (currentCluster.zCoordinate - pv.getZ()) * inverseR0; - const float zAtRmin = tanLambda * (mTimeFrame->getMinR(iLayer + 1) - currentCluster.radius) + currentCluster.zCoordinate; - const float zAtRmax = tanLambda * (mTimeFrame->getMaxR(iLayer + 1) - currentCluster.radius) + currentCluster.zCoordinate; + const float zAtRmin = tanLambda * (mTimeFrame->getMinR(transition.toLayer) - currentCluster.radius) + currentCluster.zCoordinate; + const float zAtRmax = tanLambda * (mTimeFrame->getMaxR(transition.toLayer) - currentCluster.radius) + currentCluster.zCoordinate; const float sqInvDeltaZ0 = 1.f / (math_utils::Sq(currentCluster.zCoordinate - pv.getZ()) + constants::Tolerance); - const float sigmaZ = o2::gpu::CAMath::Sqrt( - math_utils::Sq(resolution) * math_utils::Sq(tanLambda) * ((math_utils::Sq(inverseR0) + sqInvDeltaZ0) * math_utils::Sq(meanDeltaR) + 1.f) + math_utils::Sq(meanDeltaR * mTimeFrame->getMSangle(iLayer))); - const auto bins = o2::its::getBinsRect(currentCluster, iLayer + 1, zAtRmin, zAtRmax, - sigmaZ * mTrkParams[iteration].NSigmaCut, mTimeFrame->getPhiCut(iLayer), + const float sigmaZ = o2::gpu::CAMath::Sqrt((math_utils::Sq(resolution) * math_utils::Sq(tanLambda) * ((math_utils::Sq(inverseR0) + sqInvDeltaZ0) * math_utils::Sq(meanDeltaR) + 1.f)) + math_utils::Sq(meanDeltaR * msAngle)); + const auto bins = o2::its::getBinsRect(currentCluster, transition.toLayer, zAtRmin, zAtRmax, + sigmaZ * mTrkParams[iteration].NSigmaCut, phiCut, mTimeFrame->getIndexTableUtils()); if (bins.x < 0) { continue; @@ -123,18 +121,18 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iVer } for (int targetROF = rofOverlap.getFirstEntry(); targetROF < rofOverlap.getEntriesBound(); ++targetROF) { - if (!mTimeFrame->getROFMaskView().isROFEnabled(iLayer + 1, targetROF)) { + if (!mTimeFrame->getROFMaskView().isROFEnabled(transition.toLayer, targetROF)) { continue; } - auto layer1 = mTimeFrame->getClustersOnLayer(targetROF, iLayer + 1); + auto layer1 = mTimeFrame->getClustersOnLayer(targetROF, transition.toLayer); if (layer1.empty()) { continue; } - const auto ts = mTimeFrame->getROFOverlapTableView().getTimeStamp(iLayer, pivotROF, iLayer + 1, targetROF); + const auto ts = mTimeFrame->getROFOverlapTableView().getTimeStamp(transition.fromLayer, pivotROF, transition.toLayer, targetROF); if (!ts.isCompatible(pv.getTimeStamp())) { continue; } - const auto& targetIndexTable = mTimeFrame->getIndexTable(targetROF, iLayer + 1); + const auto& targetIndexTable = mTimeFrame->getIndexTable(targetROF, transition.toLayer); const int zBinRange = (bins.z - bins.x) + 1; for (int iPhi = 0; iPhi < phiBinsNum; ++iPhi) { const int iPhiBin = (bins.y + iPhi) % mTrkParams[iteration].PhiBins; @@ -147,22 +145,22 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iVer break; } const Cluster& nextCluster = layer1[iNext]; - if (mTimeFrame->isClusterUsed(iLayer + 1, nextCluster.clusterId)) { + if (mTimeFrame->isClusterUsed(transition.toLayer, nextCluster.clusterId)) { continue; } const float deltaZ = o2::gpu::CAMath::Abs((tanLambda * (nextCluster.radius - currentCluster.radius)) + currentCluster.zCoordinate - nextCluster.zCoordinate); if (deltaZ / sigmaZ < mTrkParams[iteration].NSigmaCut && - math_utils::isPhiDifferenceBelow(currentCluster.phi, nextCluster.phi, mTimeFrame->getPhiCut(iLayer))) { + math_utils::isPhiDifferenceBelow(currentCluster.phi, nextCluster.phi, phiCut)) { const float phi{o2::gpu::CAMath::ATan2(currentCluster.yCoordinate - nextCluster.yCoordinate, currentCluster.xCoordinate - nextCluster.xCoordinate)}; const float tanL = (currentCluster.zCoordinate - nextCluster.zCoordinate) / (currentCluster.radius - nextCluster.radius); if constexpr (decltype(Tag)::value == PassMode::OnePass::value) { - tracklets.emplace_back(currentSortedIndex, mTimeFrame->getSortedIndex(targetROF, iLayer + 1, iNext), tanL, phi, ts); + tracklets.emplace_back(currentSortedIndex, mTimeFrame->getSortedIndex(targetROF, transition.toLayer, iNext), tanL, phi, ts); } else if constexpr (decltype(Tag)::value == PassMode::TwoPassCount::value) { ++localCount; } else if constexpr (decltype(Tag)::value == PassMode::TwoPassInsert::value) { const int idx = base + offset++; - tracklets[idx] = Tracklet(currentSortedIndex, mTimeFrame->getSortedIndex(targetROF, iLayer + 1, iNext), tanL, phi, ts); + tracklets[idx] = Tracklet(currentSortedIndex, mTimeFrame->getSortedIndex(targetROF, transition.toLayer, iNext), tanL, phi, ts); } } } @@ -175,22 +173,24 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iVer int dummy{0}; if (mTaskArena->max_concurrency() <= 1) { - for (int iLayer{0}; iLayer < mTrkParams[iteration].TrackletsPerRoad(); ++iLayer) { - const int startROF = 0, endROF = mTimeFrame->getROFOverlapTableView().getLayer(iLayer).mNROFsTF; + for (int transitionId{0}; transitionId < topology.nTransitions; ++transitionId) { + const int fromLayer = topology.getTransition(transitionId).fromLayer; + const int startROF = 0, endROF = mTimeFrame->getROFOverlapTableView().getLayer(fromLayer).mNROFsTF; for (int pivotROF{startROF}; pivotROF < endROF; ++pivotROF) { - forTracklets(PassMode::OnePass{}, iLayer, pivotROF, 0, dummy); + forTracklets(PassMode::OnePass{}, transitionId, pivotROF, 0, dummy); } } } else { - tbb::parallel_for(0, mTrkParams[iteration].TrackletsPerRoad(), [&](const int iLayer) { - const int startROF = 0, endROF = mTimeFrame->getROFOverlapTableView().getLayer(iLayer).mNROFsTF; + tbb::parallel_for(0, static_cast(topology.nTransitions), [&](const int transitionId) { + const int fromLayer = topology.getTransition(transitionId).fromLayer; + const int startROF = 0, endROF = mTimeFrame->getROFOverlapTableView().getLayer(fromLayer).mNROFsTF; bounded_vector perROFCount((endROF - startROF) + 1, mMemoryPool.get()); tbb::parallel_for(startROF, endROF, [&](const int pivotROF) { - perROFCount[pivotROF - startROF] = forTracklets(PassMode::TwoPassCount{}, iLayer, pivotROF, 0, dummy); + perROFCount[pivotROF - startROF] = forTracklets(PassMode::TwoPassCount{}, transitionId, pivotROF, 0, dummy); }); std::exclusive_scan(perROFCount.begin(), perROFCount.end(), perROFCount.begin(), 0); const int nTracklets = perROFCount.back(); - mTimeFrame->getTracklets()[iLayer].resize(nTracklets); + mTimeFrame->getTracklets()[transitionId].resize(nTracklets); if (nTracklets == 0) { return; } @@ -200,38 +200,37 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iVer return; } int localIdx = 0; - forTracklets(PassMode::TwoPassInsert{}, iLayer, pivotROF, baseIdx, localIdx); + forTracklets(PassMode::TwoPassInsert{}, transitionId, pivotROF, baseIdx, localIdx); }); }); } - tbb::parallel_for(0, mTrkParams[iteration].TrackletsPerRoad(), [&](const int iLayer) { + tbb::parallel_for(0, static_cast(topology.nTransitions), [&](const int transitionId) { /// Sort tracklets & remove duplicates // duplicates can exist simply since we evaluate per vertex - auto& trkl{mTimeFrame->getTracklets()[iLayer]}; + auto& trkl{mTimeFrame->getTracklets()[transitionId]}; std::sort(trkl.begin(), trkl.end()); trkl.erase(std::unique(trkl.begin(), trkl.end()), trkl.end()); trkl.shrink_to_fit(); - if (iLayer > 0) { /// recalculate lut - auto& lut{mTimeFrame->getTrackletsLookupTable()[iLayer - 1]}; - if (!trkl.empty()) { - for (const auto& tkl : trkl) { - lut[tkl.firstClusterIndex + 1]++; - } - std::inclusive_scan(lut.begin(), lut.end(), lut.begin()); + auto& lut{mTimeFrame->getTrackletsLookupTable()[transitionId]}; + if (!trkl.empty()) { + for (const auto& tkl : trkl) { + lut[tkl.firstClusterIndex + 1]++; } + std::inclusive_scan(lut.begin(), lut.end(), lut.begin()); } }); /// Create tracklets labels if (mTimeFrame->hasMCinformation() && mTrkParams[iteration].CreateArtefactLabels) { - tbb::parallel_for(0, mTrkParams[iteration].TrackletsPerRoad(), [&](const int iLayer) { - for (auto& trk : mTimeFrame->getTracklets()[iLayer]) { + tbb::parallel_for(0, static_cast(topology.nTransitions), [&](const int transitionId) { + const auto& transition = topology.getTransition(transitionId); + for (auto& trk : mTimeFrame->getTracklets()[transitionId]) { MCCompLabel label; - int currentId{mTimeFrame->getClusters()[iLayer][trk.firstClusterIndex].clusterId}; - int nextId{mTimeFrame->getClusters()[iLayer + 1][trk.secondClusterIndex].clusterId}; - for (const auto& lab1 : mTimeFrame->getClusterLabels(iLayer, currentId)) { - for (const auto& lab2 : mTimeFrame->getClusterLabels(iLayer + 1, nextId)) { + int currentId{mTimeFrame->getClusters()[transition.fromLayer][trk.firstClusterIndex].clusterId}; + int nextId{mTimeFrame->getClusters()[transition.toLayer][trk.secondClusterIndex].clusterId}; + for (const auto& lab1 : mTimeFrame->getClusterLabels(transition.fromLayer, currentId)) { + for (const auto& lab2 : mTimeFrame->getClusterLabels(transition.toLayer, nextId)) { if (lab1 == lab2 && lab1.isValid()) { label = lab1; break; @@ -241,7 +240,7 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iVer break; } } - mTimeFrame->getTrackletsLabel(iLayer).emplace_back(label); + mTimeFrame->getTrackletsLabel(transitionId).emplace_back(label); } }); } @@ -251,26 +250,28 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iVer template void TrackerTraits::computeLayerCells(const int iteration) { - for (int iLayer = 0; iLayer < mTrkParams[iteration].CellsPerRoad(); ++iLayer) { - deepVectorClear(mTimeFrame->getCells()[iLayer]); - if (iLayer > 0) { - deepVectorClear(mTimeFrame->getCellsLookupTable()[iLayer - 1]); - } + const auto topology = mTimeFrame->getTrackingTopologyView(); + for (int cellTopologyId = 0; cellTopologyId < topology.nCells; ++cellTopologyId) { + deepVectorClear(mTimeFrame->getCells()[cellTopologyId]); + deepVectorClear(mTimeFrame->getCellsLookupTable()[cellTopologyId]); if (mTimeFrame->hasMCinformation() && mTrkParams[iteration].CreateArtefactLabels) { - deepVectorClear(mTimeFrame->getCellsLabel(iLayer)); + deepVectorClear(mTimeFrame->getCellsLabel(cellTopologyId)); } } mTaskArena->execute([&] { - auto forTrackletCells = [&](auto Tag, int iLayer, bounded_vector& layerCells, int iTracklet, int offset = 0) -> int { - const Tracklet& currentTracklet{mTimeFrame->getTracklets()[iLayer][iTracklet]}; + auto forTrackletCells = [&](auto Tag, int cellTopologyId, bounded_vector& layerCells, int iTracklet, int offset = 0) -> int { + const auto& cellTopology = topology.getCell(cellTopologyId); + const auto& firstTransition = topology.getTransition(cellTopology.firstTransition); + const auto& secondTransition = topology.getTransition(cellTopology.secondTransition); + const Tracklet& currentTracklet{mTimeFrame->getTracklets()[cellTopology.firstTransition][iTracklet]}; const int nextLayerClusterIndex{currentTracklet.secondClusterIndex}; - const int nextLayerFirstTrackletIndex{mTimeFrame->getTrackletsLookupTable()[iLayer][nextLayerClusterIndex]}; - const int nextLayerLastTrackletIndex{mTimeFrame->getTrackletsLookupTable()[iLayer][nextLayerClusterIndex + 1]}; + const int nextLayerFirstTrackletIndex{mTimeFrame->getTrackletsLookupTable()[cellTopology.secondTransition][nextLayerClusterIndex]}; + const int nextLayerLastTrackletIndex{mTimeFrame->getTrackletsLookupTable()[cellTopology.secondTransition][nextLayerClusterIndex + 1]}; int foundCells{0}; for (int iNextTracklet{nextLayerFirstTrackletIndex}; iNextTracklet < nextLayerLastTrackletIndex; ++iNextTracklet) { - const Tracklet& nextTracklet{mTimeFrame->getTracklets()[iLayer + 1][iNextTracklet]}; - if (mTimeFrame->getTracklets()[iLayer + 1][iNextTracklet].firstClusterIndex != nextLayerClusterIndex) { + const Tracklet& nextTracklet{mTimeFrame->getTracklets()[cellTopology.secondTransition][iNextTracklet]}; + if (nextTracklet.firstClusterIndex != nextLayerClusterIndex) { break; } if (!currentTracklet.getTimeStamp().isCompatible(nextTracklet.getTimeStamp())) { @@ -282,18 +283,20 @@ void TrackerTraits::computeLayerCells(const int iteration) /// Track seed preparation. Clusters are numbered progressively from the innermost going outward. const int clusId[3]{ - mTimeFrame->getClusters()[iLayer][currentTracklet.firstClusterIndex].clusterId, - mTimeFrame->getClusters()[iLayer + 1][nextTracklet.firstClusterIndex].clusterId, - mTimeFrame->getClusters()[iLayer + 2][nextTracklet.secondClusterIndex].clusterId}; - const auto& cluster1_glo = mTimeFrame->getUnsortedClusters()[iLayer][clusId[0]]; - const auto& cluster2_glo = mTimeFrame->getUnsortedClusters()[iLayer + 1][clusId[1]]; - const auto& cluster3_tf = mTimeFrame->getTrackingFrameInfoOnLayer(iLayer + 2)[clusId[2]]; + mTimeFrame->getClusters()[firstTransition.fromLayer][currentTracklet.firstClusterIndex].clusterId, + mTimeFrame->getClusters()[firstTransition.toLayer][nextTracklet.firstClusterIndex].clusterId, + mTimeFrame->getClusters()[secondTransition.toLayer][nextTracklet.secondClusterIndex].clusterId}; + const int hitLayers[3]{firstTransition.fromLayer, firstTransition.toLayer, secondTransition.toLayer}; + const auto& cluster1_glo = mTimeFrame->getUnsortedClusters()[firstTransition.fromLayer][clusId[0]]; + const auto& cluster2_glo = mTimeFrame->getUnsortedClusters()[firstTransition.toLayer][clusId[1]]; + const auto& cluster3_tf = mTimeFrame->getTrackingFrameInfoOnLayer(secondTransition.toLayer)[clusId[2]]; auto track{o2::its::track::buildTrackSeed(cluster1_glo, cluster2_glo, cluster3_tf, mBz)}; float chi2{0.f}; bool good{false}; for (int iC{2}; iC--;) { - const TrackingFrameInfo& trackingHit = mTimeFrame->getTrackingFrameInfoOnLayer(iLayer + iC)[clusId[iC]]; + const int hitLayer = hitLayers[iC]; + const TrackingFrameInfo& trackingHit = mTimeFrame->getTrackingFrameInfoOnLayer(hitLayer)[clusId[iC]]; if (!track.rotate(trackingHit.alphaTrackingFrame)) { break; @@ -303,7 +306,7 @@ void TrackerTraits::computeLayerCells(const int iteration) break; } - if (!track.correctForMaterial(mTrkParams[iteration].LayerxX0[iLayer + iC], mTrkParams[iteration].LayerxX0[iLayer + iC] * constants::Radl * constants::Rho, true)) { + if (!track.correctForMaterial(mTrkParams[iteration].LayerxX0[hitLayer], mTrkParams[iteration].LayerxX0[hitLayer] * constants::Radl * constants::Rho, true)) { break; } @@ -323,12 +326,13 @@ void TrackerTraits::computeLayerCells(const int iteration) TimeEstBC ts = currentTracklet.getTimeStamp(); ts += nextTracklet.getTimeStamp(); if constexpr (decltype(Tag)::value == PassMode::OnePass::value) { - layerCells.emplace_back(iLayer, clusId[0], clusId[1], clusId[2], iTracklet, iNextTracklet, track, chi2, ts); + layerCells.emplace_back(cellTopology.hitLayerMask, clusId[0], clusId[1], clusId[2], iTracklet, iNextTracklet, track, chi2, ts); ++foundCells; } else if constexpr (decltype(Tag)::value == PassMode::TwoPassCount::value) { ++foundCells; } else if constexpr (decltype(Tag)::value == PassMode::TwoPassInsert::value) { - layerCells[offset++] = CellSeed(iLayer, clusId[0], clusId[1], clusId[2], iTracklet, iNextTracklet, track, chi2, ts); + layerCells[offset++] = CellSeed(cellTopology.hitLayerMask, clusId[0], clusId[1], clusId[2], iTracklet, iNextTracklet, track, chi2, ts); + ++foundCells; } else { static_assert(false, "Unknown mode!"); } @@ -338,39 +342,32 @@ void TrackerTraits::computeLayerCells(const int iteration) return foundCells; }; - for (int iLayer = 0; iLayer < mTrkParams[iteration].CellsPerRoad(); ++iLayer) { - if (mTimeFrame->getTracklets()[iLayer + 1].empty() || - mTimeFrame->getTracklets()[iLayer].empty()) { - if (iLayer < mTrkParams[iteration].TrackletsPerRoad()) { - deepVectorClear(mTimeFrame->getTracklets()[iLayer]); - deepVectorClear(mTimeFrame->getTrackletsLabel(iLayer)); - } + for (int cellTopologyId = 0; cellTopologyId < topology.nCells; ++cellTopologyId) { + const auto& cellTopology = topology.getCell(cellTopologyId); + if (mTimeFrame->getTracklets()[cellTopology.firstTransition].empty() || + mTimeFrame->getTracklets()[cellTopology.secondTransition].empty()) { continue; } - auto& layerCells = mTimeFrame->getCells()[iLayer]; - const int currentLayerTrackletsNum{static_cast(mTimeFrame->getTracklets()[iLayer].size())}; + auto& layerCells = mTimeFrame->getCells()[cellTopologyId]; + const int currentLayerTrackletsNum{static_cast(mTimeFrame->getTracklets()[cellTopology.firstTransition].size())}; bounded_vector perTrackletCount(currentLayerTrackletsNum + 1, 0, mMemoryPool.get()); if (mTaskArena->max_concurrency() <= 1) { for (int iTracklet{0}; iTracklet < currentLayerTrackletsNum; ++iTracklet) { - perTrackletCount[iTracklet] = forTrackletCells(PassMode::OnePass{}, iLayer, layerCells, iTracklet); + perTrackletCount[iTracklet] = forTrackletCells(PassMode::OnePass{}, cellTopologyId, layerCells, iTracklet); } std::exclusive_scan(perTrackletCount.begin(), perTrackletCount.end(), perTrackletCount.begin(), 0); } else { tbb::parallel_for(0, currentLayerTrackletsNum, [&](const int iTracklet) { - perTrackletCount[iTracklet] = forTrackletCells(PassMode::TwoPassCount{}, iLayer, layerCells, iTracklet); + perTrackletCount[iTracklet] = forTrackletCells(PassMode::TwoPassCount{}, cellTopologyId, layerCells, iTracklet); }); std::exclusive_scan(perTrackletCount.begin(), perTrackletCount.end(), perTrackletCount.begin(), 0); auto totalCells{perTrackletCount.back()}; if (totalCells == 0) { - if (iLayer > 0) { - auto& lut = mTimeFrame->getCellsLookupTable()[iLayer - 1]; - lut.resize(currentLayerTrackletsNum + 1); - std::fill(lut.begin(), lut.end(), 0); - } - deepVectorClear(mTimeFrame->getTracklets()[iLayer]); - deepVectorClear(mTimeFrame->getTrackletsLabel(iLayer)); + auto& lut = mTimeFrame->getCellsLookupTable()[cellTopologyId]; + lut.resize(currentLayerTrackletsNum + 1); + std::fill(lut.begin(), lut.end(), 0); continue; } layerCells.resize(totalCells); @@ -380,181 +377,184 @@ void TrackerTraits::computeLayerCells(const int iteration) if (offset == perTrackletCount[iTracklet + 1]) { return; } - forTrackletCells(PassMode::TwoPassInsert{}, iLayer, layerCells, iTracklet, offset); + forTrackletCells(PassMode::TwoPassInsert{}, cellTopologyId, layerCells, iTracklet, offset); }); } - if (iLayer > 0) { - auto& lut = mTimeFrame->getCellsLookupTable()[iLayer - 1]; - lut.resize(currentLayerTrackletsNum + 1); - std::copy_n(perTrackletCount.begin(), currentLayerTrackletsNum + 1, lut.begin()); - } + auto& lut = mTimeFrame->getCellsLookupTable()[cellTopologyId]; + lut.resize(currentLayerTrackletsNum + 1); + std::copy_n(perTrackletCount.begin(), currentLayerTrackletsNum + 1, lut.begin()); if (mTimeFrame->hasMCinformation() && mTrkParams[iteration].CreateArtefactLabels) { - auto& labels = mTimeFrame->getCellsLabel(iLayer); + auto& labels = mTimeFrame->getCellsLabel(cellTopologyId); labels.reserve(layerCells.size()); for (const auto& cell : layerCells) { - MCCompLabel currentLab{mTimeFrame->getTrackletsLabel(iLayer)[cell.getFirstTrackletIndex()]}; - MCCompLabel nextLab{mTimeFrame->getTrackletsLabel(iLayer + 1)[cell.getSecondTrackletIndex()]}; + MCCompLabel currentLab{mTimeFrame->getTrackletsLabel(cellTopology.firstTransition)[cell.getFirstTrackletIndex()]}; + MCCompLabel nextLab{mTimeFrame->getTrackletsLabel(cellTopology.secondTransition)[cell.getSecondTrackletIndex()]}; labels.emplace_back(currentLab == nextLab ? currentLab : MCCompLabel()); } } - - // Once layer i cells are built and labelled, the corresponding tracklet artefacts are no longer needed. - deepVectorClear(mTimeFrame->getTracklets()[iLayer]); - deepVectorClear(mTimeFrame->getTrackletsLabel(iLayer)); } }); - // Clear the trailing tracklet artefacts that are not consumed as the first leg of a cell. - for (int iLayer = mTrkParams[iteration].CellsPerRoad(); iLayer < mTrkParams[iteration].TrackletsPerRoad(); ++iLayer) { - deepVectorClear(mTimeFrame->getTracklets()[iLayer]); - deepVectorClear(mTimeFrame->getTrackletsLabel(iLayer)); + for (int transitionId = 0; transitionId < topology.nTransitions; ++transitionId) { + deepVectorClear(mTimeFrame->getTracklets()[transitionId]); + deepVectorClear(mTimeFrame->getTrackletsLabel(transitionId)); } } template void TrackerTraits::findCellsNeighbours(const int iteration) { - struct Neighbor { - int cell{-1}, nextCell{-1}, level{-1}; - }; - + const auto topology = mTimeFrame->getTrackingTopologyView(); mTaskArena->execute([&] { - for (int iLayer{0}; iLayer < mTrkParams[iteration].NeighboursPerRoad(); ++iLayer) { - deepVectorClear(mTimeFrame->getCellsNeighbours()[iLayer]); - deepVectorClear(mTimeFrame->getCellsNeighboursLUT()[iLayer]); - if (mTimeFrame->getCells()[iLayer + 1].empty() || - mTimeFrame->getCellsLookupTable()[iLayer].empty()) { - continue; - } + std::vector> cellsNeighboursByTarget; + cellsNeighboursByTarget.reserve(topology.nCells); + for (int cellTopologyId{0}; cellTopologyId < topology.nCells; ++cellTopologyId) { + deepVectorClear(mTimeFrame->getCellsNeighbours()[cellTopologyId]); + deepVectorClear(mTimeFrame->getCellsNeighboursTopology()[cellTopologyId]); + deepVectorClear(mTimeFrame->getCellsNeighboursLUT()[cellTopologyId]); + cellsNeighboursByTarget.emplace_back(mMemoryPool.get()); + } - int nCells{static_cast(mTimeFrame->getCells()[iLayer].size())}; - bounded_vector cellsNeighbours(mMemoryPool.get()); - - auto forCellNeighbour = [&](auto Tag, int iCell, int offset = 0) -> int { - const auto& currentCellSeed{mTimeFrame->getCells()[iLayer][iCell]}; - const int nextLayerTrackletIndex{currentCellSeed.getSecondTrackletIndex()}; - const int nextLayerFirstCellIndex{mTimeFrame->getCellsLookupTable()[iLayer][nextLayerTrackletIndex]}; - const int nextLayerLastCellIndex{mTimeFrame->getCellsLookupTable()[iLayer][nextLayerTrackletIndex + 1]}; - int foundNextCells{0}; - for (int iNextCell{nextLayerFirstCellIndex}; iNextCell < nextLayerLastCellIndex; ++iNextCell) { - auto nextCellSeed{mTimeFrame->getCells()[iLayer + 1][iNextCell]}; /// copy - if (nextCellSeed.getFirstTrackletIndex() != nextLayerTrackletIndex || !currentCellSeed.getTimeStamp().isCompatible(nextCellSeed.getTimeStamp())) { - break; - } + for (int outerLayer{0}; outerLayer < NLayers; ++outerLayer) { + for (int cellTopologyId{0}; cellTopologyId < topology.nCells; ++cellTopologyId) { + const auto& cellTopology = topology.getCell(cellTopologyId); + if (cellTopology.hitLayerMask.last() != outerLayer || + mTimeFrame->getCells()[cellTopologyId].empty()) { + continue; + } + const auto successors = topology.getCellsStartingWithTransition(cellTopology.secondTransition); + if (!successors.getEntries()) { + continue; + } - if (!nextCellSeed.rotate(currentCellSeed.getAlpha()) || - !nextCellSeed.propagateTo(currentCellSeed.getX(), getBz())) { - continue; - } + tbb::enumerable_thread_specific> sourceNeighbours([&]() { return bounded_vector{mMemoryPool.get()}; }); + tbb::parallel_for(0, static_cast(mTimeFrame->getCells()[cellTopologyId].size()), [&](const int iCell) { + auto& localNeighbours = sourceNeighbours.local(); + const auto& currentCellSeed{mTimeFrame->getCells()[cellTopologyId][iCell]}; + const int nextLayerTrackletIndex{currentCellSeed.getSecondTrackletIndex()}; + for (int iSuccessor{0}; iSuccessor < successors.getEntries(); ++iSuccessor) { + const int nextCellTopologyId = topology.cellsByFirstTransition[successors.getFirstEntry() + iSuccessor]; + if (mTimeFrame->getCells()[nextCellTopologyId].empty() || + mTimeFrame->getCellsLookupTable()[nextCellTopologyId].empty()) { + continue; + } + const auto& nextCellLUT = mTimeFrame->getCellsLookupTable()[nextCellTopologyId]; + if (nextLayerTrackletIndex + 1 >= static_cast(nextCellLUT.size())) { + continue; + } + const int nextLayerFirstCellIndex{nextCellLUT[nextLayerTrackletIndex]}; + const int nextLayerLastCellIndex{nextCellLUT[nextLayerTrackletIndex + 1]}; + for (int iNextCell{nextLayerFirstCellIndex}; iNextCell < nextLayerLastCellIndex; ++iNextCell) { + const auto& nextCellSeedRef{mTimeFrame->getCells()[nextCellTopologyId][iNextCell]}; + if (nextCellSeedRef.getFirstTrackletIndex() != nextLayerTrackletIndex || !currentCellSeed.getTimeStamp().isCompatible(nextCellSeedRef.getTimeStamp())) { + break; + } - float chi2 = currentCellSeed.getPredictedChi2(nextCellSeed); /// TODO: switch to the chi2 wrt cluster to avoid correlation - if (chi2 > mTrkParams[iteration].MaxChi2ClusterAttachment) { - continue; - } + auto nextCellSeed{mTimeFrame->getCells()[nextCellTopologyId][iNextCell]}; /// copy + if (!nextCellSeed.rotate(currentCellSeed.getAlpha()) || + !nextCellSeed.propagateTo(currentCellSeed.getX(), getBz())) { + continue; + } - if constexpr (decltype(Tag)::value == PassMode::OnePass::value) { - cellsNeighbours.emplace_back(iCell, iNextCell, currentCellSeed.getLevel() + 1); - } else if constexpr (decltype(Tag)::value == PassMode::TwoPassCount::value) { - ++foundNextCells; - } else if constexpr (decltype(Tag)::value == PassMode::TwoPassInsert::value) { - cellsNeighbours[offset++] = {iCell, iNextCell, currentCellSeed.getLevel() + 1}; - } else { - static_assert(false, "Unknown mode!"); - } - } - return foundNextCells; - }; + float chi2 = currentCellSeed.getPredictedChi2(nextCellSeed); + if (chi2 > mTrkParams[iteration].MaxChi2ClusterAttachment) { + continue; + } - if (mTaskArena->max_concurrency() <= 1) { - for (int iCell{0}; iCell < nCells; ++iCell) { - forCellNeighbour(PassMode::OnePass{}, iCell); - } - } else { - bounded_vector perCellCount(nCells + 1, 0, mMemoryPool.get()); - tbb::parallel_for(0, nCells, [&](const int iCell) { - perCellCount[iCell] = forCellNeighbour(PassMode::TwoPassCount{}, iCell); + const int nextLevel = currentCellSeed.getLevel() + 1; + localNeighbours.emplace_back(cellTopologyId, iCell, nextCellTopologyId, iNextCell, nextLevel); + } + } }); - std::exclusive_scan(perCellCount.begin(), perCellCount.end(), perCellCount.begin(), 0); - int totalCellNeighbours = perCellCount.back(); - if (totalCellNeighbours == 0) { - deepVectorClear(mTimeFrame->getCellsNeighbours()[iLayer]); - continue; + bounded_vector count(topology.nCells, 0, mMemoryPool.get()); + for (const auto& localNeighbours : sourceNeighbours) { + for (const auto& neigh : localNeighbours) { + ++count[neigh.nextCellTopology]; + } } - cellsNeighbours.resize(totalCellNeighbours); - - tbb::parallel_for(0, nCells, [&](const int iCell) { - int offset = perCellCount[iCell]; - if (offset == perCellCount[iCell + 1]) { - return; + for (size_t i{0}; i < topology.nCells; ++i) { + cellsNeighboursByTarget[i].reserve(count[i]); + } + for (const auto& localNeighbours : sourceNeighbours) { + for (const auto& neigh : localNeighbours) { + cellsNeighboursByTarget[neigh.nextCellTopology].emplace_back(neigh); + if (neigh.level > mTimeFrame->getCells()[neigh.nextCellTopology][neigh.nextCell].getLevel()) { + mTimeFrame->getCells()[neigh.nextCellTopology][neigh.nextCell].setLevel(neigh.level); + } } - forCellNeighbour(PassMode::TwoPassInsert{}, iCell, offset); - }); + } } + } + for (int cellTopologyId{0}; cellTopologyId < topology.nCells; ++cellTopologyId) { + auto& cellsNeighbours = cellsNeighboursByTarget[cellTopologyId]; if (cellsNeighbours.empty()) { continue; } - tbb::parallel_sort(cellsNeighbours.begin(), cellsNeighbours.end(), [](const auto& a, const auto& b) { + std::sort(cellsNeighbours.begin(), cellsNeighbours.end(), [](const auto& a, const auto& b) { return a.nextCell < b.nextCell; }); - auto& cellsNeighbourLUT = mTimeFrame->getCellsNeighboursLUT()[iLayer]; - cellsNeighbourLUT.assign(mTimeFrame->getCells()[iLayer + 1].size(), 0); + auto& cellsNeighbourLUT = mTimeFrame->getCellsNeighboursLUT()[cellTopologyId]; + cellsNeighbourLUT.assign(mTimeFrame->getCells()[cellTopologyId].size(), 0); for (const auto& neigh : cellsNeighbours) { ++cellsNeighbourLUT[neigh.nextCell]; } std::inclusive_scan(cellsNeighbourLUT.begin(), cellsNeighbourLUT.end(), cellsNeighbourLUT.begin()); - mTimeFrame->getCellsNeighbours()[iLayer].reserve(cellsNeighbours.size()); - std::ranges::transform(cellsNeighbours, std::back_inserter(mTimeFrame->getCellsNeighbours()[iLayer]), [](const auto& neigh) { return neigh.cell; }); - - for (auto it = cellsNeighbours.begin(); it != cellsNeighbours.end();) { - int cellIdx = it->nextCell; - int maxLvl = it->level; - while (++it != cellsNeighbours.end() && it->nextCell == cellIdx) { - maxLvl = std::max(maxLvl, it->level); - } - mTimeFrame->getCells()[iLayer + 1][cellIdx].setLevel(maxLvl); - } + mTimeFrame->getCellsNeighbours()[cellTopologyId].reserve(cellsNeighbours.size()); + mTimeFrame->getCellsNeighboursTopology()[cellTopologyId].reserve(cellsNeighbours.size()); + std::ranges::transform(cellsNeighbours, std::back_inserter(mTimeFrame->getCellsNeighbours()[cellTopologyId]), [](const auto& neigh) { return neigh.cell; }); + std::ranges::transform(cellsNeighbours, std::back_inserter(mTimeFrame->getCellsNeighboursTopology()[cellTopologyId]), [](const auto& neigh) { return neigh.cellTopology; }); + } - // clear cells LUT - deepVectorClear(mTimeFrame->getCellsLookupTable()[iLayer]); + // clean up LUTs + for (auto& cellLUT : mTimeFrame->getCellsLookupTable()) { + deepVectorClear(cellLUT); } }); } template template -void TrackerTraits::processNeighbours(int iteration, int iLayer, int iLevel, const bounded_vector& currentCellSeed, const bounded_vector& currentCellId, bounded_vector& updatedCellSeeds, bounded_vector& updatedCellsIds) +void TrackerTraits::processNeighbours(int iteration, int defaultCellTopologyId, int iLevel, const bounded_vector& currentCellSeed, const bounded_vector& currentCellId, const bounded_vector& currentCellTopologyId, bounded_vector& updatedCellSeeds, bounded_vector& updatedCellsIds, bounded_vector& updatedCellsTopologyIds) { auto propagator = o2::base::Propagator::Instance(); mTaskArena->execute([&] { auto forCellNeighbours = [&](auto Tag, int iCell, int offset = 0) -> int { const auto& currentCell{currentCellSeed[iCell]}; + const int cellTopologyId = currentCellTopologyId.empty() ? defaultCellTopologyId : currentCellTopologyId[iCell]; if constexpr (decltype(Tag)::value != PassMode::TwoPassInsert::value) { if (currentCell.getLevel() != iLevel) { return 0; } - if (currentCellId.empty() && (mTimeFrame->isClusterUsed(iLayer, currentCell.getFirstClusterIndex()) || - mTimeFrame->isClusterUsed(iLayer + 1, currentCell.getSecondClusterIndex()) || - mTimeFrame->isClusterUsed(iLayer + 2, currentCell.getThirdClusterIndex()))) { - return 0; /// this we do only on the first iteration, hence the check on currentCellId + if (currentCellId.empty()) { + for (int layer = 0; layer < NLayers; ++layer) { + const int clusterIndex = currentCell.getCluster(layer); + if (clusterIndex != constants::UnusedIndex && mTimeFrame->isClusterUsed(layer, clusterIndex)) { + return 0; /// this we do only on the first iteration, hence the check on currentCellId + } + } } } const int cellId = currentCellId.empty() ? iCell : currentCellId[iCell]; - const int startNeighbourId{cellId ? mTimeFrame->getCellsNeighboursLUT()[iLayer - 1][cellId - 1] : 0}; - const int endNeighbourId{mTimeFrame->getCellsNeighboursLUT()[iLayer - 1][cellId]}; + if (cellTopologyId < 0 || mTimeFrame->getCellsNeighboursLUT()[cellTopologyId].empty()) { + return 0; + } + const int startNeighbourId{cellId ? mTimeFrame->getCellsNeighboursLUT()[cellTopologyId][cellId - 1] : 0}; + const int endNeighbourId{mTimeFrame->getCellsNeighboursLUT()[cellTopologyId][cellId]}; int foundSeeds{0}; for (int iNeighbourCell{startNeighbourId}; iNeighbourCell < endNeighbourId; ++iNeighbourCell) { - const int neighbourCellId = mTimeFrame->getCellsNeighbours()[iLayer - 1][iNeighbourCell]; - const auto& neighbourCell = mTimeFrame->getCells()[iLayer - 1][neighbourCellId]; + const int neighbourCellTopologyId = mTimeFrame->getCellsNeighboursTopology()[cellTopologyId][iNeighbourCell]; + const int neighbourCellId = mTimeFrame->getCellsNeighbours()[cellTopologyId][iNeighbourCell]; + const auto& neighbourCell = mTimeFrame->getCells()[neighbourCellTopologyId][neighbourCellId]; if (neighbourCell.getSecondTrackletIndex() != currentCell.getFirstTrackletIndex()) { continue; } @@ -564,7 +564,9 @@ void TrackerTraits::processNeighbours(int iteration, int iLayer, int iL if (currentCell.getLevel() - 1 != neighbourCell.getLevel()) { continue; } - if (mTimeFrame->isClusterUsed(iLayer - 1, neighbourCell.getFirstClusterIndex())) { + const int neighbourLayer = neighbourCell.getInnerLayer(); + const int neighbourCluster = neighbourCell.getFirstClusterIndex(); + if (mTimeFrame->isClusterUsed(neighbourLayer, neighbourCluster)) { continue; } @@ -572,7 +574,7 @@ void TrackerTraits::processNeighbours(int iteration, int iLayer, int iL TrackSeedN seed{currentCell}; seed.getTimeStamp() = currentCell.getTimeStamp(); seed.getTimeStamp() += neighbourCell.getTimeStamp(); - const auto& trHit = mTimeFrame->getTrackingFrameInfoOnLayer(iLayer - 1)[neighbourCell.getFirstClusterIndex()]; + const auto& trHit = mTimeFrame->getTrackingFrameInfoOnLayer(neighbourLayer)[neighbourCluster]; if (!seed.rotate(trHit.alphaTrackingFrame)) { continue; @@ -583,7 +585,7 @@ void TrackerTraits::processNeighbours(int iteration, int iLayer, int iL } if (mTrkParams[iteration].CorrType == o2::base::PropagatorF::MatCorrType::USEMatCorrNONE) { - if (!seed.correctForMaterial(mTrkParams[iteration].LayerxX0[iLayer - 1], mTrkParams[iteration].LayerxX0[iLayer - 1] * constants::Radl * constants::Rho, true)) { + if (!seed.correctForMaterial(mTrkParams[iteration].LayerxX0[neighbourLayer], mTrkParams[iteration].LayerxX0[neighbourLayer] * constants::Radl * constants::Rho, true)) { continue; } } @@ -598,7 +600,10 @@ void TrackerTraits::processNeighbours(int iteration, int iLayer, int iL } if constexpr (decltype(Tag)::value != PassMode::TwoPassCount::value) { - seed.getClusters()[iLayer - 1] = neighbourCell.getFirstClusterIndex(); + seed.getClusters()[neighbourLayer] = neighbourCluster; + auto mask = seed.getHitLayerMask(); + mask.set(neighbourLayer); + seed.setHitLayerMask(mask); seed.setLevel(neighbourCell.getLevel()); seed.setFirstTrackletIndex(neighbourCell.getFirstTrackletIndex()); seed.setSecondTrackletIndex(neighbourCell.getSecondTrackletIndex()); @@ -607,11 +612,13 @@ void TrackerTraits::processNeighbours(int iteration, int iLayer, int iL if constexpr (decltype(Tag)::value == PassMode::OnePass::value) { updatedCellSeeds.push_back(seed); updatedCellsIds.push_back(neighbourCellId); + updatedCellsTopologyIds.push_back(neighbourCellTopologyId); } else if constexpr (decltype(Tag)::value == PassMode::TwoPassCount::value) { ++foundSeeds; } else if constexpr (decltype(Tag)::value == PassMode::TwoPassInsert::value) { updatedCellSeeds[offset] = seed; - updatedCellsIds[offset++] = neighbourCellId; + updatedCellsIds[offset] = neighbourCellId; + updatedCellsTopologyIds[offset++] = neighbourCellTopologyId; } else { static_assert(false, "Unknown mode!"); } @@ -637,6 +644,7 @@ void TrackerTraits::processNeighbours(int iteration, int iLayer, int iL } updatedCellSeeds.resize(totalNeighbours); updatedCellsIds.resize(totalNeighbours); + updatedCellsTopologyIds.resize(totalNeighbours); tbb::parallel_for(0, nCells, [&](const int iCell) { int offset = perCellCount[iCell]; @@ -663,33 +671,42 @@ void TrackerTraits::findRoads(const int iteration) tfInfos[iLayer] = mTimeFrame->getTrackingFrameInfoOnLayer(iLayer).data(); unsortedClusters[iLayer] = mTimeFrame->getUnsortedClusters()[iLayer].data(); } + const auto topology = mTimeFrame->getTrackingTopologyView(); for (int startLevel{mTrkParams[iteration].CellsPerRoad()}; startLevel >= mTrkParams[iteration].CellMinimumLevel(); --startLevel) { auto seedFilter = [&](const auto& seed) { - return seed.getQ2Pt() <= 1.e3 && seed.getChi2() <= mTrkParams[iteration].MaxChi2NDF * ((startLevel + 2) * 2 - 5); + return seed.getHitLayerMask().isAllowed(mTrkParams[iteration].MaxHoles, mTrkParams[iteration].HoleLayerMask) && + seed.getHitLayerMask().length() >= mTrkParams[iteration].MinTrackLength && + seed.getQ2Pt() <= 1.e3 && seed.getChi2() <= mTrkParams[iteration].MaxChi2NDF * ((startLevel + 2) * 2 - 5); }; bounded_vector trackSeeds(mMemoryPool.get()); - for (int startLayer{mTrkParams[iteration].NeighboursPerRoad()}; startLayer >= startLevel - 1; --startLayer) { - if ((mTrkParams[iteration].StartLayerMask & (1 << (startLayer + 2))) == 0) { + for (int startCellTopologyId{0}; startCellTopologyId < topology.nCells; ++startCellTopologyId) { + const int startLayer = topology.getCell(startCellTopologyId).hitLayerMask.last(); + if ((mTrkParams[iteration].StartLayerMask & (1 << startLayer)) == 0 || + mTimeFrame->getCells()[startCellTopologyId].empty()) { continue; } bounded_vector lastCellId(mMemoryPool.get()), updatedCellId(mMemoryPool.get()); + bounded_vector lastCellTopologyId(mMemoryPool.get()), updatedCellTopologyId(mMemoryPool.get()); bounded_vector lastCellSeed(mMemoryPool.get()), updatedCellSeed(mMemoryPool.get()); - processNeighbours(iteration, startLayer, startLevel, mTimeFrame->getCells()[startLayer], lastCellId, updatedCellSeed, updatedCellId); + processNeighbours(iteration, startCellTopologyId, startLevel, mTimeFrame->getCells()[startCellTopologyId], lastCellId, lastCellTopologyId, updatedCellSeed, updatedCellId, updatedCellTopologyId); int level = startLevel; - for (int iLayer{startLayer - 1}; iLayer > 0 && level > 2; --iLayer) { + while (level > 2 && !updatedCellSeed.empty()) { lastCellSeed.swap(updatedCellSeed); lastCellId.swap(updatedCellId); + lastCellTopologyId.swap(updatedCellTopologyId); deepVectorClear(updatedCellSeed); /// tame the memory peaks deepVectorClear(updatedCellId); /// tame the memory peaks - processNeighbours(iteration, iLayer, --level, lastCellSeed, lastCellId, updatedCellSeed, updatedCellId); + deepVectorClear(updatedCellTopologyId); + processNeighbours(iteration, constants::UnusedIndex, --level, lastCellSeed, lastCellId, lastCellTopologyId, updatedCellSeed, updatedCellId, updatedCellTopologyId); } - deepVectorClear(lastCellId); /// tame the memory peaks - deepVectorClear(lastCellSeed); /// tame the memory peaks + deepVectorClear(lastCellId); /// tame the memory peaks + deepVectorClear(lastCellTopologyId); /// tame the memory peaks + deepVectorClear(lastCellSeed); /// tame the memory peaks if (!updatedCellSeed.empty()) { trackSeeds.reserve(trackSeeds.size() + std::count_if(updatedCellSeed.begin(), updatedCellSeed.end(), seedFilter)); @@ -767,7 +784,7 @@ void TrackerTraits::findRoads(const int iteration) }); std::sort(tracks.begin(), tracks.end(), [](const auto& a, const auto& b) { - return a.getChi2() < b.getChi2(); + return track::isBetter(a, b); }); acceptTracks(iteration, tracks, firstClusters, sharedFirstClusters); @@ -778,6 +795,8 @@ void TrackerTraits::findRoads(const int iteration) template void TrackerTraits::acceptTracks(int iteration, bounded_vector& tracks, bounded_vector>& firstClusters, bounded_vector>& sharedFirstClusters) { + auto& trks = mTimeFrame->getTracks(); + trks.reserve(trks.size() + tracks.size()); const float smallestROFHalf = mTimeFrame->getROFOverlapTableView().getClockLayer().mROFLength * 0.5f; for (auto& track : tracks) { int nShared = 0; @@ -837,7 +856,7 @@ void TrackerTraits::acceptTracks(int iteration, bounded_vectorgetTracks().emplace_back(track); + trks.emplace_back(track); if (mTrkParams[iteration].AllowSharingFirstCluster) { firstClusters[firstLayer].push_back(firstCluster); @@ -897,13 +916,13 @@ void TrackerTraits::setNThreads(int n, std::shared_ptr } template class TrackerTraits<7>; -template void TrackerTraits<7>::processNeighbours(int, int, int, const bounded_vector&, const bounded_vector&, bounded_vector>&, bounded_vector&); -template void TrackerTraits<7>::processNeighbours>(int, int, int, const bounded_vector>&, const bounded_vector&, bounded_vector>&, bounded_vector&); +template void TrackerTraits<7>::processNeighbours(int, int, int, const bounded_vector&, const bounded_vector&, const bounded_vector&, bounded_vector>&, bounded_vector&, bounded_vector&); +template void TrackerTraits<7>::processNeighbours>(int, int, int, const bounded_vector>&, const bounded_vector&, const bounded_vector&, bounded_vector>&, bounded_vector&, bounded_vector&); // ALICE3 upgrade #ifdef ENABLE_UPGRADES template class TrackerTraits<11>; -template void TrackerTraits<11>::processNeighbours(int, int, int, const bounded_vector&, const bounded_vector&, bounded_vector>&, bounded_vector&); -template void TrackerTraits<11>::processNeighbours>(int, int, int, const bounded_vector>&, const bounded_vector&, bounded_vector>&, bounded_vector&); +template void TrackerTraits<11>::processNeighbours(int, int, int, const bounded_vector&, const bounded_vector&, const bounded_vector&, bounded_vector>&, bounded_vector&, bounded_vector&); +template void TrackerTraits<11>::processNeighbours>(int, int, int, const bounded_vector>&, const bounded_vector&, const bounded_vector&, bounded_vector>&, bounded_vector&, bounded_vector&); #endif } // namespace o2::its diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx index f745d671419af..7f10419d63fea 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx @@ -54,6 +54,12 @@ void ITSTrackingInterface::initialise() LOGP(info, "Initializing tracker in {} phase reconstruction with {} passes for tracking and {}/{} for vertexing", TrackingMode::toString(mMode), trackParams.size(), o2::its::VertexerParamConfig::Instance().nIterations, vertParams.size()); mTracker->setParameters(trackParams); mVertexer->setParameters(vertParams); + TrackingParameters vertexTrackingParams; + mTimeFrame->initVertexingTopology(vertexTrackingParams); + if (!trackParams.empty()) { + mTimeFrame->initDefaultTrackingTopology(trackParams[0], NLayers); + mTimeFrame->initTrackerTopologies(gsl::span(trackParams.data(), trackParams.size())); + } if (mMode == TrackingMode::Cosmics) { mRunVertexer = false; diff --git a/Detectors/ITSMFT/ITS/tracking/test/CMakeLists.txt b/Detectors/ITSMFT/ITS/tracking/test/CMakeLists.txt index 063583b4cfa1b..f8fce10b78602 100644 --- a/Detectors/ITSMFT/ITS/tracking/test/CMakeLists.txt +++ b/Detectors/ITSMFT/ITS/tracking/test/CMakeLists.txt @@ -20,3 +20,9 @@ o2_add_test(roflookuptables COMPONENT_NAME its-tracking LABELS "its;tracking" PUBLIC_LINK_LIBRARIES O2::ITStracking) + +o2_add_test(trackingtopology + SOURCES testTrackingTopology.cxx + COMPONENT_NAME its-tracking + LABELS "its;tracking" + PUBLIC_LINK_LIBRARIES O2::ITStracking) diff --git a/Detectors/ITSMFT/ITS/tracking/test/testROFLookupTables.cxx b/Detectors/ITSMFT/ITS/tracking/test/testROFLookupTables.cxx index dd98a75efca7c..9626e42efd547 100644 --- a/Detectors/ITSMFT/ITS/tracking/test/testROFLookupTables.cxx +++ b/Detectors/ITSMFT/ITS/tracking/test/testROFLookupTables.cxx @@ -559,6 +559,7 @@ BOOST_AUTO_TEST_CASE(rofvertex_basic) vertices.push_back(vert1); table.update(vertices.data(), vertices.size()); const auto view = table.getView(); + view.printAll(); } BOOST_AUTO_TEST_CASE(rofvertex_init_with_vertices) diff --git a/Detectors/ITSMFT/ITS/tracking/test/testTrackingTopology.cxx b/Detectors/ITSMFT/ITS/tracking/test/testTrackingTopology.cxx new file mode 100644 index 0000000000000..4944d00b15fea --- /dev/null +++ b/Detectors/ITSMFT/ITS/tracking/test/testTrackingTopology.cxx @@ -0,0 +1,119 @@ +// Copyright 2019-2026 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include +#define BOOST_TEST_MODULE ITS TrackingTopology +#define BOOST_TEST_MAIN +#define BOOST_TEST_DYN_LINK + +#include +#include "ITStracking/TrackingTopology.h" + +/// -------- Tests -------- +BOOST_AUTO_TEST_CASE(layermask_holes_and_length) +{ + using o2::its::LayerMask; + + const LayerMask layer3Hole{0x77}; // layers 0,1,2,4,5,6 + BOOST_CHECK_EQUAL(layer3Hole.count(), 6); + BOOST_CHECK_EQUAL(layer3Hole.length(), 7); + BOOST_CHECK_EQUAL(layer3Hole.holeMask().value(), 0x08); + BOOST_CHECK(layer3Hole.isAllowed(1, 0x08)); + BOOST_CHECK(!layer3Hole.isAllowed(0, 0x08)); + + const LayerMask missingLeadingLayer0{0x7e}; // layers 1..6 + BOOST_CHECK_EQUAL(missingLeadingLayer0.count(), 6); + BOOST_CHECK_EQUAL(missingLeadingLayer0.length(), 6); + BOOST_CHECK_EQUAL(missingLeadingLayer0.holeMask().value(), 0x00); + + const LayerMask missingTrailingLayer6{0x3f}; // layers 0..5 + BOOST_CHECK_EQUAL(missingTrailingLayer6.count(), 6); + BOOST_CHECK_EQUAL(missingTrailingLayer6.length(), 6); + BOOST_CHECK_EQUAL(missingTrailingLayer6.holeMask().value(), 0x00); +} + +BOOST_AUTO_TEST_CASE(layermask_topological_length_counts_internal_holes) +{ + using o2::its::LayerMask; + + BOOST_CHECK_GE(LayerMask{0x7f}.length(), 7); // 7 clusters + BOOST_CHECK_GE(LayerMask{0x77}.length(), 7); // 6 clusters + layer-3 hole + BOOST_CHECK_LT(LayerMask{0x7e}.length(), 7); // missing leading layer + BOOST_CHECK_LT(LayerMask{0x3f}.length(), 7); // missing trailing layer +} + +BOOST_AUTO_TEST_CASE(trackingtopology_basic) +{ + o2::its::TrackingTopology<4> topo; + topo.init(4, 0, 0); + const auto view = topo.getView(); + view.print(); + + BOOST_CHECK_EQUAL(view.nTransitions, 3); + for (int i{0}; i < 3; ++i) { + const auto& tra = view.getTransition(i); + BOOST_CHECK_EQUAL(tra.fromLayer, i); + BOOST_CHECK_EQUAL(tra.toLayer, i + 1); + } + + BOOST_CHECK_EQUAL(view.nCells, 2); + for (int i{0}; i < 2; ++i) { + const auto& cell = view.getCell(i); + BOOST_CHECK_EQUAL(cell.firstTransition, i); + BOOST_CHECK_EQUAL(cell.secondTransition, i + 1); + } +} + +BOOST_AUTO_TEST_CASE(trackingtopology_single_allowed_hole) +{ + o2::its::TrackingTopology<5> topo; + topo.init(5, 1, 1 << 2); + const auto view = topo.getView(); + view.print(); + + BOOST_CHECK_EQUAL(view.nTransitions, 5); + BOOST_CHECK_EQUAL(view.nCells, 5); + + bool hasHoleTransition = false; + for (int i{0}; i < view.nTransitions; ++i) { + const auto& transition = view.getTransition(i); + hasHoleTransition |= transition.fromLayer == 1 && transition.toLayer == 3; + BOOST_CHECK(o2::its::LayerMask::skipped(transition.fromLayer, transition.toLayer).isAllowedHoleMask(1, 1 << 2)); + } + BOOST_CHECK(hasHoleTransition); + + bool hasHoleCell = false; + for (int i{0}; i < view.nCells; ++i) { + const auto& cell = view.getCell(i); + hasHoleCell |= cell.hitLayerMask.value() == 0x0b; // layers 0,1,3 + BOOST_CHECK(cell.hitLayerMask.isAllowed(1, 1 << 2)); + } + BOOST_CHECK(hasHoleCell); +} + +BOOST_AUTO_TEST_CASE(trackingtopology_rejects_wrong_hole_layer) +{ + o2::its::TrackingTopology<5> topo; + topo.init(5, 1, 1 << 2); + const auto view = topo.getView(); + view.print(); + + for (int i{0}; i < view.nTransitions; ++i) { + const auto& transition = view.getTransition(i); + BOOST_CHECK(!(transition.fromLayer == 0 && transition.toLayer == 2)); + BOOST_CHECK(!(transition.fromLayer == 2 && transition.toLayer == 4)); + } + + for (int i{0}; i < view.nCells; ++i) { + const auto& cell = view.getCell(i); + BOOST_CHECK(cell.hitLayerMask.holeMask().isSubsetOf(1 << 2)); + } +}