@@ -20,11 +20,14 @@ package tasks
2020import (
2121 "encoding/json"
2222 "reflect"
23+ "sort"
2324 "strings"
2425 "time"
2526
2627 "github.com/apache/incubator-devlake/core/dal"
2728 "github.com/apache/incubator-devlake/core/errors"
29+ "github.com/apache/incubator-devlake/core/log"
30+ "github.com/apache/incubator-devlake/core/models/common"
2831 "github.com/apache/incubator-devlake/core/plugin"
2932 "github.com/apache/incubator-devlake/core/utils"
3033 "github.com/apache/incubator-devlake/helpers/pluginhelper/api"
@@ -49,7 +52,8 @@ type GraphqlQueryIssueWrapper struct {
4952}
5053
5154type GraphqlQueryIssueDetailWrapper struct {
52- RateLimit struct {
55+ requestedIssues map [int ]missingGithubIssueRef
56+ RateLimit struct {
5357 Cost int
5458 }
5559 Repository struct {
@@ -84,6 +88,22 @@ type GraphqlQueryIssue struct {
8488 } `graphql:"labels(first: 100)"`
8589}
8690
91+ type missingGithubIssueRef struct {
92+ ConnectionId uint64
93+ RepoId int
94+ GithubId int
95+ Number int
96+ RawDataOrigin common.RawDataOrigin
97+ }
98+
99+ type missingGithubIssueCleanupScope struct {
100+ ConnectionId uint64
101+ RepoId int
102+ GithubId int
103+ Number int
104+ RawDataOrigin common.RawDataOrigin
105+ }
106+
87107var CollectIssuesMeta = plugin.SubTaskMeta {
88108 Name : "Collect Issues" ,
89109 EntryPoint : CollectIssues ,
@@ -175,12 +195,20 @@ func CollectIssues(taskCtx plugin.SubTaskContext) errors.Error {
175195 ownerName := strings .Split (data .Options .Name , "/" )
176196 inputIssues := reqData .Input .([]interface {})
177197 outputIssues := []map [string ]interface {}{}
198+ query .requestedIssues = make (map [int ]missingGithubIssueRef , len (inputIssues ))
178199 for _ , i := range inputIssues {
179200 inputIssue := i .(* models.GithubIssue )
180201 outputIssues = append (outputIssues , map [string ]interface {}{
181202 `number` : graphql .Int (inputIssue .Number ),
182203 })
183204 issueUpdatedAt [inputIssue .Number ] = inputIssue .GithubUpdatedAt
205+ query .requestedIssues [inputIssue .Number ] = missingGithubIssueRef {
206+ ConnectionId : inputIssue .ConnectionId ,
207+ RepoId : inputIssue .RepoId ,
208+ GithubId : inputIssue .GithubId ,
209+ Number : inputIssue .Number ,
210+ RawDataOrigin : inputIssue .RawDataOrigin ,
211+ }
184212 }
185213 variables := map [string ]interface {}{
186214 "issue" : outputIssues ,
@@ -193,10 +221,17 @@ func CollectIssues(taskCtx plugin.SubTaskContext) errors.Error {
193221 query := queryWrapper .(* GraphqlQueryIssueDetailWrapper )
194222 issues := query .Repository .Issues
195223 for _ , rawL := range issues {
224+ if rawL .DatabaseId == 0 || rawL .Number == 0 {
225+ continue
226+ }
196227 if rawL .UpdatedAt .After (issueUpdatedAt [rawL .Number ]) {
197228 messages = append (messages , errors .Must1 (json .Marshal (rawL )))
198229 }
199230 }
231+ missingIssues := findMissingGithubIssues (query .requestedIssues , issues )
232+ if len (missingIssues ) > 0 {
233+ err = cleanupMissingGithubIssues (db , taskCtx .GetLogger (), missingIssues )
234+ }
200235 return
201236 },
202237 })
@@ -206,3 +241,159 @@ func CollectIssues(taskCtx plugin.SubTaskContext) errors.Error {
206241
207242 return apiCollector .Execute ()
208243}
244+
245+ func findMissingGithubIssues (requestedIssues map [int ]missingGithubIssueRef , resolvedIssues []GraphqlQueryIssue ) []missingGithubIssueRef {
246+ if len (requestedIssues ) == 0 {
247+ return nil
248+ }
249+
250+ resolvedNumbers := make (map [int ]struct {}, len (resolvedIssues ))
251+ for _ , issue := range resolvedIssues {
252+ if issue .DatabaseId == 0 || issue .Number == 0 {
253+ continue
254+ }
255+ resolvedNumbers [issue .Number ] = struct {}{}
256+ }
257+
258+ missingIssues := make ([]missingGithubIssueRef , 0 )
259+ for number , issue := range requestedIssues {
260+ if _ , ok := resolvedNumbers [number ]; ok {
261+ continue
262+ }
263+ missingIssues = append (missingIssues , issue )
264+ }
265+ sort .Slice (missingIssues , func (i , j int ) bool {
266+ return missingIssues [i ].Number < missingIssues [j ].Number
267+ })
268+ return missingIssues
269+ }
270+
271+ func cleanupMissingGithubIssues (db dal.Dal , logger log.Logger , issues []missingGithubIssueRef ) errors.Error {
272+ var allErrors []error
273+ for _ , issue := range issues {
274+ scope , ok := buildMissingGithubIssueCleanupScope (issue )
275+ if ! ok {
276+ logger .Warn (nil , "GitHub issue #%d no longer resolves from the source API, but source scope is incomplete so stale cleanup is skipped" , issue .Number )
277+ continue
278+ }
279+ logger .Warn (nil , "GitHub issue #%d no longer resolves from the source API, deleting stale local data for the current repository scope" , issue .Number )
280+ err := cleanupMissingGithubIssue (db , scope )
281+ if err != nil {
282+ allErrors = append (allErrors , err )
283+ }
284+ }
285+ return errors .Default .Combine (allErrors )
286+ }
287+
288+ func buildMissingGithubIssueCleanupScope (issue missingGithubIssueRef ) (* missingGithubIssueCleanupScope , bool ) {
289+ if issue .ConnectionId == 0 || issue .RepoId == 0 || issue .GithubId == 0 || issue .RawDataOrigin .RawDataTable == "" || issue .RawDataOrigin .RawDataParams == "" {
290+ return nil , false
291+ }
292+ return & missingGithubIssueCleanupScope {
293+ ConnectionId : issue .ConnectionId ,
294+ RepoId : issue .RepoId ,
295+ GithubId : issue .GithubId ,
296+ Number : issue .Number ,
297+ RawDataOrigin : issue .RawDataOrigin ,
298+ }, true
299+ }
300+
301+ func (scope * missingGithubIssueCleanupScope ) issueScopedClauses () []dal.Clause {
302+ return []dal.Clause {
303+ dal .Where (
304+ "connection_id = ? AND issue_id = ? AND _raw_data_table = ? AND _raw_data_params = ?" ,
305+ scope .ConnectionId ,
306+ scope .GithubId ,
307+ scope .RawDataOrigin .RawDataTable ,
308+ scope .RawDataOrigin .RawDataParams ,
309+ ),
310+ }
311+ }
312+
313+ func (scope * missingGithubIssueCleanupScope ) assigneeScopedClauses () []dal.Clause {
314+ return []dal.Clause {
315+ dal .Where (
316+ "connection_id = ? AND repo_id = ? AND issue_id = ? AND _raw_data_table = ? AND _raw_data_params = ?" ,
317+ scope .ConnectionId ,
318+ scope .RepoId ,
319+ scope .GithubId ,
320+ scope .RawDataOrigin .RawDataTable ,
321+ scope .RawDataOrigin .RawDataParams ,
322+ ),
323+ }
324+ }
325+
326+ func (scope * missingGithubIssueCleanupScope ) githubIssueScopedClauses () []dal.Clause {
327+ return []dal.Clause {
328+ dal .Where (
329+ "connection_id = ? AND repo_id = ? AND github_id = ? AND _raw_data_table = ? AND _raw_data_params = ?" ,
330+ scope .ConnectionId ,
331+ scope .RepoId ,
332+ scope .GithubId ,
333+ scope .RawDataOrigin .RawDataTable ,
334+ scope .RawDataOrigin .RawDataParams ,
335+ ),
336+ }
337+ }
338+
339+ func (scope * missingGithubIssueCleanupScope ) rawDataScopedClauses () []dal.Clause {
340+ if scope .RawDataOrigin .RawDataId == 0 {
341+ return nil
342+ }
343+ return []dal.Clause {dal .Where ("id = ?" , scope .RawDataOrigin .RawDataId )}
344+ }
345+
346+ func cleanupMissingGithubIssue (db dal.Dal , scope * missingGithubIssueCleanupScope ) errors.Error {
347+ deleteByIssueId := func (model any , table string ) errors.Error {
348+ clauses := append ([]dal.Clause {dal .From (table )}, scope .issueScopedClauses ()... )
349+ err := db .Delete (model , clauses ... )
350+ if err != nil {
351+ return errors .Default .Wrap (err , "failed to delete stale github issue data from " + table )
352+ }
353+ return nil
354+ }
355+
356+ err := deleteByIssueId (& models.GithubIssueComment {}, models.GithubIssueComment {}.TableName ())
357+ if err != nil {
358+ return err
359+ }
360+ err = deleteByIssueId (& models.GithubIssueEvent {}, models.GithubIssueEvent {}.TableName ())
361+ if err != nil {
362+ return err
363+ }
364+ err = deleteByIssueId (& models.GithubIssueLabel {}, models.GithubIssueLabel {}.TableName ())
365+ if err != nil {
366+ return err
367+ }
368+ err = db .Delete (
369+ & models.GithubIssueAssignee {},
370+ append ([]dal.Clause {dal .From (models.GithubIssueAssignee {}.TableName ())}, scope .assigneeScopedClauses ()... )... ,
371+ )
372+ if err != nil {
373+ return errors .Default .Wrap (err , "failed to delete stale github issue assignees" )
374+ }
375+ err = db .Delete (
376+ & models.GithubPrIssue {},
377+ append ([]dal.Clause {dal .From (models.GithubPrIssue {}.TableName ())}, scope .issueScopedClauses ()... )... ,
378+ )
379+ if err != nil {
380+ return errors .Default .Wrap (err , "failed to delete stale github pull request issue links" )
381+ }
382+ err = db .Delete (
383+ & models.GithubIssue {},
384+ append ([]dal.Clause {dal .From (models.GithubIssue {}.TableName ())}, scope .githubIssueScopedClauses ()... )... ,
385+ )
386+ if err != nil {
387+ return errors .Default .Wrap (err , "failed to delete stale github issue" )
388+ }
389+ if rawDataClauses := scope .rawDataScopedClauses (); len (rawDataClauses ) > 0 {
390+ err = db .Delete (
391+ & api.RawData {},
392+ append ([]dal.Clause {dal .From (scope .RawDataOrigin .RawDataTable )}, rawDataClauses ... )... ,
393+ )
394+ if err != nil {
395+ return errors .Default .Wrap (err , "failed to delete stale raw github issue" )
396+ }
397+ }
398+ return nil
399+ }
0 commit comments