Skip to content

Commit 4973be6

Browse files
authored
fix(github_graphql): remove stale local issue rows when an issue no longer resolves (#8820)
* fix(github_graphql): remove stale local issue rows * fix(github_graphql): prevent stale issue cleanup from crossing repository boundaries The previous missing-issue cleanup only keyed deletes by connection and issue identifiers, which could reach too broadly when an issue was transferred between repositories under the same DevLake connection. This change scopes cleanup to the source repository by carrying RepoId through the refresh path and requiring the source RawDataOrigin parameters before deleting stale tool rows. Constraint: Keep the fix inside the existing github_graphql collector path without schema changes Rejected: Continue deleting by connection_id + github_id only | could remove destination-repository rows after transfers Rejected: Disable stale cleanup for all missing issues | would leave deleted-issue failures recurring Confidence: medium Scope-risk: moderate Directive: Preserve source-scope guards if future cleanup paths touch transferred issues again Tested: go test ./plugins/github_graphql/tasks; go build ./helpers/pluginhelper/api ./plugins/github_graphql/tasks Not-tested: End-to-end transfer scenario against a live GitHub repository
1 parent c2b530a commit 4973be6

4 files changed

Lines changed: 375 additions & 6 deletions

File tree

backend/helpers/pluginhelper/api/graphql_collector.go

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -276,14 +276,18 @@ func (collector *GraphqlCollector) fetchAsync(reqData *GraphqlRequestData, handl
276276
}
277277
if len(dataErrors) > 0 {
278278
if !collector.args.IgnoreQueryErrors {
279+
hasNonIgnorableDataErrors := false
279280
for _, dataError := range dataErrors {
280-
if strings.Contains(dataError.Error(), "Could not resolve to an Issue") {
281-
logger.Warn(nil, "Issue may have been transferred.")
282-
} else {
283-
collector.checkError(errors.Default.Wrap(dataError, `graphql query got error`))
281+
if isIgnorableGraphqlQueryError(dataError) {
282+
logger.Warn(nil, "Issue may have been transferred or deleted.")
283+
continue
284284
}
285+
hasNonIgnorableDataErrors = true
286+
collector.checkError(errors.Default.Wrap(dataError, `graphql query got error`))
287+
}
288+
if hasNonIgnorableDataErrors {
289+
return
285290
}
286-
return
287291
}
288292
// else: error will deal by ResponseParserWithDataErrors
289293
}
@@ -344,4 +348,8 @@ func (collector *GraphqlCollector) HasError() bool {
344348
return len(collector.workerErrors) > 0
345349
}
346350

351+
func isIgnorableGraphqlQueryError(err error) bool {
352+
return err != nil && strings.Contains(err.Error(), "Could not resolve to an Issue")
353+
}
354+
347355
var _ plugin.SubTask = (*GraphqlCollector)(nil)
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
Licensed to the Apache Software Foundation (ASF) under one or more
3+
contributor license agreements. See the NOTICE file distributed with
4+
this work for additional information regarding copyright ownership.
5+
The ASF licenses this file to You under the Apache License, Version 2.0
6+
(the "License"); you may not use this file except in compliance with
7+
the License. You may obtain a copy of the License at
8+
9+
http://www.apache.org/licenses/LICENSE-2.0
10+
11+
Unless required by applicable law or agreed to in writing, software
12+
distributed under the License is distributed on an "AS IS" BASIS,
13+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
See the License for the specific language governing permissions and
15+
limitations under the License.
16+
*/
17+
18+
package api
19+
20+
import (
21+
"errors"
22+
"testing"
23+
24+
"github.com/stretchr/testify/assert"
25+
)
26+
27+
func TestIsIgnorableGraphqlQueryError(t *testing.T) {
28+
assert.True(t, isIgnorableGraphqlQueryError(errors.New("Could not resolve to an Issue with the number of 17.")))
29+
assert.False(t, isIgnorableGraphqlQueryError(errors.New("some other graphql error")))
30+
assert.False(t, isIgnorableGraphqlQueryError(nil))
31+
}

backend/plugins/github_graphql/tasks/issue_collector.go

Lines changed: 192 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,14 @@ package tasks
2020
import (
2121
"encoding/json"
2222
"reflect"
23+
"sort"
2324
"strings"
2425
"time"
2526

2627
"github.com/apache/incubator-devlake/core/dal"
2728
"github.com/apache/incubator-devlake/core/errors"
29+
"github.com/apache/incubator-devlake/core/log"
30+
"github.com/apache/incubator-devlake/core/models/common"
2831
"github.com/apache/incubator-devlake/core/plugin"
2932
"github.com/apache/incubator-devlake/core/utils"
3033
"github.com/apache/incubator-devlake/helpers/pluginhelper/api"
@@ -49,7 +52,8 @@ type GraphqlQueryIssueWrapper struct {
4952
}
5053

5154
type GraphqlQueryIssueDetailWrapper struct {
52-
RateLimit struct {
55+
requestedIssues map[int]missingGithubIssueRef
56+
RateLimit struct {
5357
Cost int
5458
}
5559
Repository struct {
@@ -84,6 +88,22 @@ type GraphqlQueryIssue struct {
8488
} `graphql:"labels(first: 100)"`
8589
}
8690

91+
type missingGithubIssueRef struct {
92+
ConnectionId uint64
93+
RepoId int
94+
GithubId int
95+
Number int
96+
RawDataOrigin common.RawDataOrigin
97+
}
98+
99+
type missingGithubIssueCleanupScope struct {
100+
ConnectionId uint64
101+
RepoId int
102+
GithubId int
103+
Number int
104+
RawDataOrigin common.RawDataOrigin
105+
}
106+
87107
var CollectIssuesMeta = plugin.SubTaskMeta{
88108
Name: "Collect Issues",
89109
EntryPoint: CollectIssues,
@@ -175,12 +195,20 @@ func CollectIssues(taskCtx plugin.SubTaskContext) errors.Error {
175195
ownerName := strings.Split(data.Options.Name, "/")
176196
inputIssues := reqData.Input.([]interface{})
177197
outputIssues := []map[string]interface{}{}
198+
query.requestedIssues = make(map[int]missingGithubIssueRef, len(inputIssues))
178199
for _, i := range inputIssues {
179200
inputIssue := i.(*models.GithubIssue)
180201
outputIssues = append(outputIssues, map[string]interface{}{
181202
`number`: graphql.Int(inputIssue.Number),
182203
})
183204
issueUpdatedAt[inputIssue.Number] = inputIssue.GithubUpdatedAt
205+
query.requestedIssues[inputIssue.Number] = missingGithubIssueRef{
206+
ConnectionId: inputIssue.ConnectionId,
207+
RepoId: inputIssue.RepoId,
208+
GithubId: inputIssue.GithubId,
209+
Number: inputIssue.Number,
210+
RawDataOrigin: inputIssue.RawDataOrigin,
211+
}
184212
}
185213
variables := map[string]interface{}{
186214
"issue": outputIssues,
@@ -193,10 +221,17 @@ func CollectIssues(taskCtx plugin.SubTaskContext) errors.Error {
193221
query := queryWrapper.(*GraphqlQueryIssueDetailWrapper)
194222
issues := query.Repository.Issues
195223
for _, rawL := range issues {
224+
if rawL.DatabaseId == 0 || rawL.Number == 0 {
225+
continue
226+
}
196227
if rawL.UpdatedAt.After(issueUpdatedAt[rawL.Number]) {
197228
messages = append(messages, errors.Must1(json.Marshal(rawL)))
198229
}
199230
}
231+
missingIssues := findMissingGithubIssues(query.requestedIssues, issues)
232+
if len(missingIssues) > 0 {
233+
err = cleanupMissingGithubIssues(db, taskCtx.GetLogger(), missingIssues)
234+
}
200235
return
201236
},
202237
})
@@ -206,3 +241,159 @@ func CollectIssues(taskCtx plugin.SubTaskContext) errors.Error {
206241

207242
return apiCollector.Execute()
208243
}
244+
245+
func findMissingGithubIssues(requestedIssues map[int]missingGithubIssueRef, resolvedIssues []GraphqlQueryIssue) []missingGithubIssueRef {
246+
if len(requestedIssues) == 0 {
247+
return nil
248+
}
249+
250+
resolvedNumbers := make(map[int]struct{}, len(resolvedIssues))
251+
for _, issue := range resolvedIssues {
252+
if issue.DatabaseId == 0 || issue.Number == 0 {
253+
continue
254+
}
255+
resolvedNumbers[issue.Number] = struct{}{}
256+
}
257+
258+
missingIssues := make([]missingGithubIssueRef, 0)
259+
for number, issue := range requestedIssues {
260+
if _, ok := resolvedNumbers[number]; ok {
261+
continue
262+
}
263+
missingIssues = append(missingIssues, issue)
264+
}
265+
sort.Slice(missingIssues, func(i, j int) bool {
266+
return missingIssues[i].Number < missingIssues[j].Number
267+
})
268+
return missingIssues
269+
}
270+
271+
func cleanupMissingGithubIssues(db dal.Dal, logger log.Logger, issues []missingGithubIssueRef) errors.Error {
272+
var allErrors []error
273+
for _, issue := range issues {
274+
scope, ok := buildMissingGithubIssueCleanupScope(issue)
275+
if !ok {
276+
logger.Warn(nil, "GitHub issue #%d no longer resolves from the source API, but source scope is incomplete so stale cleanup is skipped", issue.Number)
277+
continue
278+
}
279+
logger.Warn(nil, "GitHub issue #%d no longer resolves from the source API, deleting stale local data for the current repository scope", issue.Number)
280+
err := cleanupMissingGithubIssue(db, scope)
281+
if err != nil {
282+
allErrors = append(allErrors, err)
283+
}
284+
}
285+
return errors.Default.Combine(allErrors)
286+
}
287+
288+
func buildMissingGithubIssueCleanupScope(issue missingGithubIssueRef) (*missingGithubIssueCleanupScope, bool) {
289+
if issue.ConnectionId == 0 || issue.RepoId == 0 || issue.GithubId == 0 || issue.RawDataOrigin.RawDataTable == "" || issue.RawDataOrigin.RawDataParams == "" {
290+
return nil, false
291+
}
292+
return &missingGithubIssueCleanupScope{
293+
ConnectionId: issue.ConnectionId,
294+
RepoId: issue.RepoId,
295+
GithubId: issue.GithubId,
296+
Number: issue.Number,
297+
RawDataOrigin: issue.RawDataOrigin,
298+
}, true
299+
}
300+
301+
func (scope *missingGithubIssueCleanupScope) issueScopedClauses() []dal.Clause {
302+
return []dal.Clause{
303+
dal.Where(
304+
"connection_id = ? AND issue_id = ? AND _raw_data_table = ? AND _raw_data_params = ?",
305+
scope.ConnectionId,
306+
scope.GithubId,
307+
scope.RawDataOrigin.RawDataTable,
308+
scope.RawDataOrigin.RawDataParams,
309+
),
310+
}
311+
}
312+
313+
func (scope *missingGithubIssueCleanupScope) assigneeScopedClauses() []dal.Clause {
314+
return []dal.Clause{
315+
dal.Where(
316+
"connection_id = ? AND repo_id = ? AND issue_id = ? AND _raw_data_table = ? AND _raw_data_params = ?",
317+
scope.ConnectionId,
318+
scope.RepoId,
319+
scope.GithubId,
320+
scope.RawDataOrigin.RawDataTable,
321+
scope.RawDataOrigin.RawDataParams,
322+
),
323+
}
324+
}
325+
326+
func (scope *missingGithubIssueCleanupScope) githubIssueScopedClauses() []dal.Clause {
327+
return []dal.Clause{
328+
dal.Where(
329+
"connection_id = ? AND repo_id = ? AND github_id = ? AND _raw_data_table = ? AND _raw_data_params = ?",
330+
scope.ConnectionId,
331+
scope.RepoId,
332+
scope.GithubId,
333+
scope.RawDataOrigin.RawDataTable,
334+
scope.RawDataOrigin.RawDataParams,
335+
),
336+
}
337+
}
338+
339+
func (scope *missingGithubIssueCleanupScope) rawDataScopedClauses() []dal.Clause {
340+
if scope.RawDataOrigin.RawDataId == 0 {
341+
return nil
342+
}
343+
return []dal.Clause{dal.Where("id = ?", scope.RawDataOrigin.RawDataId)}
344+
}
345+
346+
func cleanupMissingGithubIssue(db dal.Dal, scope *missingGithubIssueCleanupScope) errors.Error {
347+
deleteByIssueId := func(model any, table string) errors.Error {
348+
clauses := append([]dal.Clause{dal.From(table)}, scope.issueScopedClauses()...)
349+
err := db.Delete(model, clauses...)
350+
if err != nil {
351+
return errors.Default.Wrap(err, "failed to delete stale github issue data from "+table)
352+
}
353+
return nil
354+
}
355+
356+
err := deleteByIssueId(&models.GithubIssueComment{}, models.GithubIssueComment{}.TableName())
357+
if err != nil {
358+
return err
359+
}
360+
err = deleteByIssueId(&models.GithubIssueEvent{}, models.GithubIssueEvent{}.TableName())
361+
if err != nil {
362+
return err
363+
}
364+
err = deleteByIssueId(&models.GithubIssueLabel{}, models.GithubIssueLabel{}.TableName())
365+
if err != nil {
366+
return err
367+
}
368+
err = db.Delete(
369+
&models.GithubIssueAssignee{},
370+
append([]dal.Clause{dal.From(models.GithubIssueAssignee{}.TableName())}, scope.assigneeScopedClauses()...)...,
371+
)
372+
if err != nil {
373+
return errors.Default.Wrap(err, "failed to delete stale github issue assignees")
374+
}
375+
err = db.Delete(
376+
&models.GithubPrIssue{},
377+
append([]dal.Clause{dal.From(models.GithubPrIssue{}.TableName())}, scope.issueScopedClauses()...)...,
378+
)
379+
if err != nil {
380+
return errors.Default.Wrap(err, "failed to delete stale github pull request issue links")
381+
}
382+
err = db.Delete(
383+
&models.GithubIssue{},
384+
append([]dal.Clause{dal.From(models.GithubIssue{}.TableName())}, scope.githubIssueScopedClauses()...)...,
385+
)
386+
if err != nil {
387+
return errors.Default.Wrap(err, "failed to delete stale github issue")
388+
}
389+
if rawDataClauses := scope.rawDataScopedClauses(); len(rawDataClauses) > 0 {
390+
err = db.Delete(
391+
&api.RawData{},
392+
append([]dal.Clause{dal.From(scope.RawDataOrigin.RawDataTable)}, rawDataClauses...)...,
393+
)
394+
if err != nil {
395+
return errors.Default.Wrap(err, "failed to delete stale raw github issue")
396+
}
397+
}
398+
return nil
399+
}

0 commit comments

Comments
 (0)