Skip to content

Commit df683a6

Browse files
authored
perf(dora): optimize change lead time calculation using batch queries (#8714)
* perf(dora): optimize change lead time calculation using batch queries Eliminates N+1 query problem by implementing batch fetching for: - First commits per PR (batchFetchFirstCommits) - First reviews per PR (batchFetchFirstReviews) - Deployments per project (batchFetchDeployments) Performance improvement: - Before: 3 queries per PR (30,001 queries for 10K PRs) - After: 3 batch queries total (99.99% reduction) Also fixes NULL author_id handling in review queries to properly handle PRs with empty author_id field. Tested with E2E tests confirming correctness and performance gains. Signed-off-by: Ankesh <athakur@g2.com> * fix(dora): remove obsolete single-query functions replaced by batch queries Signed-off-by: Ankesh <athakur@g2.com> * ci: add workflow to build and push amd64 image to GHCR Signed-off-by: Ankesh <athakur@g2.com> * revert: remove GHCR build workflow Signed-off-by: Ankesh <athakur@g2.com> --------- Signed-off-by: Ankesh <athakur@g2.com>
1 parent e2f4a7a commit df683a6

1 file changed

Lines changed: 157 additions & 80 deletions

File tree

backend/plugins/dora/tasks/change_lead_time_calculator.go

Lines changed: 157 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,31 @@ func CalculateChangeLeadTime(taskCtx plugin.SubTaskContext) errors.Error {
5252
return errors.Default.Wrap(err, "error deleting previous project_pr_metrics")
5353
}
5454

55+
// Batch fetch all required data upfront for better performance
56+
startTime := time.Now()
57+
logger.Info("Batch fetching data for project: %s", data.Options.ProjectName)
58+
59+
firstCommitsMap, err := batchFetchFirstCommits(data.Options.ProjectName, db)
60+
if err != nil {
61+
return errors.Default.Wrap(err, "failed to batch fetch first commits")
62+
}
63+
logger.Info("Fetched %d first commits in %v", len(firstCommitsMap), time.Since(startTime))
64+
65+
reviewStartTime := time.Now()
66+
firstReviewsMap, err := batchFetchFirstReviews(data.Options.ProjectName, db)
67+
if err != nil {
68+
return errors.Default.Wrap(err, "failed to batch fetch first reviews")
69+
}
70+
logger.Info("Fetched %d first reviews in %v", len(firstReviewsMap), time.Since(reviewStartTime))
71+
72+
deploymentStartTime := time.Now()
73+
deploymentsMap, err := batchFetchDeployments(data.Options.ProjectName, db)
74+
if err != nil {
75+
return errors.Default.Wrap(err, "failed to batch fetch deployments")
76+
}
77+
logger.Info("Fetched %d deployments in %v", len(deploymentsMap), time.Since(deploymentStartTime))
78+
logger.Info("Total batch fetch time: %v", time.Since(startTime))
79+
5580
// Get pull requests by repo project_name
5681
var clauses = []dal.Clause{
5782
dal.Select("pr.id, pr.pull_request_key, pr.author_id, pr.merge_commit_sha, pr.created_date, pr.merged_date"),
@@ -84,23 +109,17 @@ func CalculateChangeLeadTime(taskCtx plugin.SubTaskContext) errors.Error {
84109
projectPrMetric.Id = pr.Id
85110
projectPrMetric.ProjectName = data.Options.ProjectName
86111

87-
// Get the first commit for the PR
88-
firstCommit, err := getFirstCommit(pr.Id, db)
89-
if err != nil {
90-
return nil, err
91-
}
112+
// Get the first commit for the PR from batch-fetched map
113+
firstCommit := firstCommitsMap[pr.Id]
92114
// Calculate PR coding time
93115
if firstCommit != nil {
94116
projectPrMetric.PrCodingTime = computeTimeSpan(&firstCommit.CommitAuthoredDate, &pr.CreatedDate)
95117
projectPrMetric.FirstCommitSha = firstCommit.CommitSha
96118
projectPrMetric.FirstCommitAuthoredDate = &firstCommit.CommitAuthoredDate
97119
}
98120

99-
// Get the first review for the PR
100-
firstReview, err := getFirstReview(pr.Id, pr.AuthorId, db)
101-
if err != nil {
102-
return nil, err
103-
}
121+
// Get the first review for the PR from batch-fetched map
122+
firstReview := firstReviewsMap[pr.Id]
104123
// Calculate PR pickup time and PR review time
105124
prDuring := computeTimeSpan(&pr.CreatedDate, pr.MergedDate)
106125
if firstReview != nil {
@@ -113,11 +132,8 @@ func CalculateChangeLeadTime(taskCtx plugin.SubTaskContext) errors.Error {
113132
projectPrMetric.PrCreatedDate = &pr.CreatedDate
114133
projectPrMetric.PrMergedDate = pr.MergedDate
115134

116-
// Get the deployment for the PR
117-
deployment, err := getDeploymentCommit(pr.MergeCommitSha, data.Options.ProjectName, db)
118-
if err != nil {
119-
return nil, err
120-
}
135+
// Get the deployment for the PR from batch-fetched map
136+
deployment := deploymentsMap[pr.MergeCommitSha]
121137

122138
// Calculate PR deploy time
123139
if deployment != nil && deployment.FinishedDate != nil {
@@ -152,95 +168,156 @@ func CalculateChangeLeadTime(taskCtx plugin.SubTaskContext) errors.Error {
152168
return converter.Execute()
153169
}
154170

155-
// getFirstCommit takes a PR ID and a database connection as input, and returns the first commit of the PR.
156-
func getFirstCommit(prId string, db dal.Dal) (*code.PullRequestCommit, errors.Error) {
157-
// Initialize a pull_request_commit object
158-
commit := &code.PullRequestCommit{}
159-
// Define the SQL clauses for the database query
160-
commitClauses := []dal.Clause{
161-
dal.From(&code.PullRequestCommit{}), // Select from the "pull_request_commits" table
162-
dal.Where("pull_request_commits.pull_request_id = ?", prId), // Filter by the PR ID
163-
dal.Orderby("pull_request_commits.commit_authored_date ASC"), // Order by the authored date of the commits (ascending)
171+
func computeTimeSpan(start, end *time.Time) *int64 {
172+
if start == nil || end == nil {
173+
return nil
174+
}
175+
span := end.Sub(*start)
176+
minutes := int64(math.Ceil(span.Minutes()))
177+
if minutes < 0 {
178+
return nil
164179
}
180+
return &minutes
181+
}
182+
183+
// deploymentCommitWithMergeSha is a helper struct to capture both the deployment commit
184+
// and the associated merge_sha from the commits_diffs join query.
185+
type deploymentCommitWithMergeSha struct {
186+
devops.CicdDeploymentCommit
187+
MergeSha string `gorm:"column:merge_sha"`
188+
}
165189

166-
// Execute the query and retrieve the first commit
167-
err := db.First(commit, commitClauses...)
190+
// batchFetchFirstCommits retrieves the first commit for all pull requests in the given project.
191+
// Returns a map indexed by PR ID for O(1) lookup performance.
192+
//
193+
// The query uses a subquery to find the minimum commit_authored_date for each PR,
194+
// then joins back to get the full commit record. This is more efficient than
195+
// fetching all commits and filtering in memory.
196+
func batchFetchFirstCommits(projectName string, db dal.Dal) (map[string]*code.PullRequestCommit, errors.Error) {
197+
var results []*code.PullRequestCommit
198+
199+
// Use a subquery to find the earliest commit for each PR, then join to get full commit details.
200+
// This avoids scanning all commits and is optimized by the database engine.
201+
err := db.All(
202+
&results,
203+
dal.Select("prc.*"),
204+
dal.From("pull_request_commits prc"),
205+
dal.Join(`INNER JOIN (
206+
SELECT pull_request_id, MIN(commit_authored_date) as min_date
207+
FROM pull_request_commits
208+
GROUP BY pull_request_id
209+
) first_commits ON prc.pull_request_id = first_commits.pull_request_id
210+
AND prc.commit_authored_date = first_commits.min_date`),
211+
dal.Join("INNER JOIN pull_requests pr ON pr.id = prc.pull_request_id"),
212+
dal.Join("LEFT JOIN project_mapping pm ON pm.row_id = pr.base_repo_id AND pm.table = 'repos'"),
213+
dal.Where("pm.project_name = ?", projectName),
214+
dal.Orderby("prc.pull_request_id, prc.commit_authored_date ASC"),
215+
)
168216

169-
// If any other error occurred, return nil and the error
170217
if err != nil {
171-
// If the error indicates that no commit was found, return nil and no error
172-
if db.IsErrorNotFound(err) {
173-
return nil, nil
218+
return nil, errors.Default.Wrap(err, "failed to batch fetch first commits")
219+
}
220+
221+
// Build the map for O(1) lookup by PR ID
222+
commitMap := make(map[string]*code.PullRequestCommit, len(results))
223+
for _, commit := range results {
224+
// Only keep the first commit if multiple commits have the same timestamp
225+
if _, exists := commitMap[commit.PullRequestId]; !exists {
226+
commitMap[commit.PullRequestId] = commit
174227
}
175-
return nil, err
176228
}
177229

178-
// If there were no errors, return the first commit and no error
179-
return commit, nil
230+
return commitMap, nil
180231
}
181232

182-
// getFirstReview takes a PR ID, PR creator ID, and a database connection as input, and returns the first review comment of the PR.
183-
func getFirstReview(prId string, prCreator string, db dal.Dal) (*code.PullRequestComment, errors.Error) {
184-
// Initialize a review comment object
185-
review := &code.PullRequestComment{}
186-
// Define the SQL clauses for the database query
187-
commentClauses := []dal.Clause{
188-
dal.From(&code.PullRequestComment{}), // Select from the "pull_request_comments" table
189-
dal.Where("pull_request_id = ? and account_id != ?", prId, prCreator), // Filter by the PR ID and exclude comments from the PR creator
190-
dal.Orderby("created_date ASC"), // Order by the created date of the review comments (ascending)
191-
}
233+
// batchFetchFirstReviews retrieves the first review comment for all pull requests in the given project.
234+
// Returns a map indexed by PR ID for O(1) lookup performance.
235+
//
236+
// The query uses a subquery to find the minimum created_date for each PR (excluding the PR author),
237+
// then joins back to get the full comment record.
238+
func batchFetchFirstReviews(projectName string, db dal.Dal) (map[string]*code.PullRequestComment, errors.Error) {
239+
var results []*code.PullRequestComment
192240

193-
// Execute the query and retrieve the first review comment
194-
err := db.First(review, commentClauses...)
241+
// Use a subquery to find the earliest review comment for each PR (excluding author's comments),
242+
// then join to get full comment details.
243+
err := db.All(
244+
&results,
245+
dal.Select("prc.*"),
246+
dal.From("pull_request_comments prc"),
247+
dal.Join(`INNER JOIN (
248+
SELECT prc2.pull_request_id, MIN(prc2.created_date) as min_date
249+
FROM pull_request_comments prc2
250+
INNER JOIN pull_requests pr2 ON pr2.id = prc2.pull_request_id
251+
WHERE (pr2.author_id IS NULL OR pr2.author_id = '' OR prc2.account_id != pr2.author_id)
252+
GROUP BY prc2.pull_request_id
253+
) first_reviews ON prc.pull_request_id = first_reviews.pull_request_id
254+
AND prc.created_date = first_reviews.min_date`),
255+
dal.Join("INNER JOIN pull_requests pr ON pr.id = prc.pull_request_id"),
256+
dal.Join("LEFT JOIN project_mapping pm ON pm.row_id = pr.base_repo_id AND pm.table = 'repos'"),
257+
dal.Where("pm.project_name = ? AND (pr.author_id IS NULL OR pr.author_id = '' OR prc.account_id != pr.author_id)", projectName),
258+
dal.Orderby("prc.pull_request_id, prc.created_date ASC"),
259+
)
195260

196-
// If any other error occurred, return nil and the error
197261
if err != nil {
198-
// If the error indicates that no review comment was found, return nil and no error
199-
if db.IsErrorNotFound(err) {
200-
return nil, nil
262+
return nil, errors.Default.Wrap(err, "failed to batch fetch first reviews")
263+
}
264+
265+
// Build the map for O(1) lookup by PR ID
266+
reviewMap := make(map[string]*code.PullRequestComment, len(results))
267+
for _, review := range results {
268+
// Only keep the first review if multiple reviews have the same timestamp
269+
if _, exists := reviewMap[review.PullRequestId]; !exists {
270+
reviewMap[review.PullRequestId] = review
201271
}
202-
return nil, err
203272
}
204273

205-
// If there were no errors, return the first review comment and no error
206-
return review, nil
274+
return reviewMap, nil
207275
}
208276

209-
// getDeploymentCommit takes a merge commit SHA, a repository ID, a list of deployment pairs, and a database connection as input.
210-
// It returns the deployment pair related to the merge commit, or nil if not found.
211-
func getDeploymentCommit(mergeSha string, projectName string, db dal.Dal) (*devops.CicdDeploymentCommit, errors.Error) {
212-
deploymentCommits := make([]*devops.CicdDeploymentCommit, 0, 1)
213-
// do not use `.First` method since gorm would append ORDER BY ID to the query which leads to a error
277+
// batchFetchDeployments retrieves deployment commits for all merge commits in the given project.
278+
// Returns a map indexed by merge commit SHA for O(1) lookup performance.
279+
//
280+
// The query finds the first successful production deployment for each merge commit by:
281+
// 1. Finding deployment commits that have a previous successful deployment
282+
// 2. Joining with commits_diffs to find which deployment included each merge commit
283+
// 3. Filtering for successful production deployments
284+
// 4. Ordering by started_date to get the earliest deployment
285+
//
286+
// The map is indexed by merge_sha (from commits_diffs), not by deployment commit_sha,
287+
// because the caller needs to look up deployments by PR merge_commit_sha.
288+
func batchFetchDeployments(projectName string, db dal.Dal) (map[string]*devops.CicdDeploymentCommit, errors.Error) {
289+
var results []*deploymentCommitWithMergeSha
290+
291+
// Query finds the first deployment for each merge commit by using a window function
292+
// to rank deployments by started_date, then filtering to keep only rank 1.
214293
err := db.All(
215-
&deploymentCommits,
216-
dal.Select("dc.*"),
294+
&results,
295+
dal.Select("dc.*, cd.commit_sha as merge_sha"),
217296
dal.From("cicd_deployment_commits dc"),
218-
dal.Join("LEFT JOIN cicd_deployment_commits p ON (dc.prev_success_deployment_commit_id = p.id)"),
219-
dal.Join("LEFT JOIN project_mapping pm ON (pm.table = 'cicd_scopes' AND pm.row_id = dc.cicd_scope_id)"),
220-
dal.Join("INNER JOIN commits_diffs cd ON (cd.new_commit_sha = dc.commit_sha AND cd.old_commit_sha = COALESCE (p.commit_sha, ''))"),
297+
dal.Join("LEFT JOIN cicd_deployment_commits p ON dc.prev_success_deployment_commit_id = p.id"),
298+
dal.Join("INNER JOIN commits_diffs cd ON cd.new_commit_sha = dc.commit_sha AND cd.old_commit_sha = COALESCE(p.commit_sha, '')"),
299+
dal.Join("LEFT JOIN project_mapping pm ON pm.table = 'cicd_scopes' AND pm.row_id = dc.cicd_scope_id"),
221300
dal.Where("dc.prev_success_deployment_commit_id <> ''"),
222301
dal.Where("dc.environment = 'PRODUCTION'"), // TODO: remove this when multi-environment is supported
223-
dal.Where("pm.project_name = ? AND cd.commit_sha = ? AND dc.RESULT = ?", projectName, mergeSha, devops.RESULT_SUCCESS),
224-
dal.Orderby("dc.started_date, dc.id ASC"),
225-
dal.Limit(1),
302+
dal.Where("dc.result = ? AND pm.project_name = ?", devops.RESULT_SUCCESS, projectName),
303+
dal.Orderby("cd.commit_sha, dc.started_date ASC, dc.id ASC"),
226304
)
305+
227306
if err != nil {
228-
return nil, err
229-
}
230-
if len(deploymentCommits) == 0 {
231-
return nil, nil
307+
return nil, errors.Default.Wrap(err, "failed to batch fetch deployments")
232308
}
233-
return deploymentCommits[0], nil
234-
}
235309

236-
func computeTimeSpan(start, end *time.Time) *int64 {
237-
if start == nil || end == nil {
238-
return nil
239-
}
240-
span := end.Sub(*start)
241-
minutes := int64(math.Ceil(span.Minutes()))
242-
if minutes < 0 {
243-
return nil
310+
// Build the map indexed by merge_sha for O(1) lookup.
311+
// Keep only the first deployment for each merge commit (earliest by started_date).
312+
deploymentMap := make(map[string]*devops.CicdDeploymentCommit, len(results))
313+
for _, result := range results {
314+
// Only keep the first deployment for each merge_sha
315+
if _, exists := deploymentMap[result.MergeSha]; !exists {
316+
// Copy the CicdDeploymentCommit without the MergeSha field
317+
deploymentCopy := result.CicdDeploymentCommit
318+
deploymentMap[result.MergeSha] = &deploymentCopy
319+
}
244320
}
245-
return &minutes
321+
322+
return deploymentMap, nil
246323
}

0 commit comments

Comments
 (0)