Skip to content

Commit 19b853a

Browse files
authored
fix(q_dev): prevent data duplication in user_report and user_data tables (#8737)
* fix(q_dev): prevent data duplication in user_report and user_data tables Replace auto-increment ID with composite primary keys so that CreateOrUpdate can properly deduplicate rows on re-extraction. - user_report PK: (connection_id, scope_id, user_id, date, client_type) - user_data PK: (connection_id, scope_id, user_id, date) - Switch db.Create() to db.CreateOrUpdate() in s3_data_extractor - Migration drops old tables, rebuilds with new PKs, resets s3_file_meta processed flag to trigger re-extraction * fix(q_dev): gofmt archived user_data_v2 model
1 parent 3a1cf9c commit 19b853a

8 files changed

Lines changed: 252 additions & 13 deletions

File tree

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/*
2+
Licensed to the Apache Software Foundation (ASF) under one or more
3+
contributor license agreements. See the NOTICE file distributed with
4+
this work for additional information regarding copyright ownership.
5+
The ASF licenses this file to You under the Apache License, Version 2.0
6+
(the "License"); you may not use this file except in compliance with
7+
the License. You may obtain a copy of the License at
8+
9+
http://www.apache.org/licenses/LICENSE-2.0
10+
11+
Unless required by applicable law or agreed to in writing, software
12+
distributed under the License is distributed on an "AS IS" BASIS,
13+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
See the License for the specific language governing permissions and
15+
limitations under the License.
16+
*/
17+
18+
package migrationscripts
19+
20+
import (
21+
"github.com/apache/incubator-devlake/core/context"
22+
"github.com/apache/incubator-devlake/core/errors"
23+
"github.com/apache/incubator-devlake/helpers/migrationhelper"
24+
"github.com/apache/incubator-devlake/plugins/q_dev/models/migrationscripts/archived"
25+
)
26+
27+
type fixDedupUserTables struct{}
28+
29+
func (*fixDedupUserTables) Up(basicRes context.BasicRes) errors.Error {
30+
db := basicRes.GetDal()
31+
32+
// Drop old tables that used auto-increment ID (which caused data duplication)
33+
err := db.DropTables(
34+
"_tool_q_dev_user_report",
35+
"_tool_q_dev_user_data",
36+
)
37+
if err != nil {
38+
return errors.Default.Wrap(err, "failed to drop old user tables")
39+
}
40+
41+
// Recreate tables with composite primary keys for proper deduplication
42+
err = migrationhelper.AutoMigrateTables(
43+
basicRes,
44+
&archived.QDevUserReportV2{},
45+
&archived.QDevUserDataV2{},
46+
)
47+
if err != nil {
48+
return errors.Default.Wrap(err, "failed to recreate user tables")
49+
}
50+
51+
return nil
52+
}
53+
54+
func (*fixDedupUserTables) Version() uint64 {
55+
return 20260228000001
56+
}
57+
58+
func (*fixDedupUserTables) Name() string {
59+
return "Rebuild user_report and user_data tables with composite primary keys to fix data duplication"
60+
}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
Licensed to the Apache Software Foundation (ASF) under one or more
3+
contributor license agreements. See the NOTICE file distributed with
4+
this work for additional information regarding copyright ownership.
5+
The ASF licenses this file to You under the Apache License, Version 2.0
6+
(the "License"); you may not use this file except in compliance with
7+
the License. You may obtain a copy of the License at
8+
9+
http://www.apache.org/licenses/LICENSE-2.0
10+
11+
Unless required by applicable law or agreed to in writing, software
12+
distributed under the License is distributed on an "AS IS" BASIS,
13+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
See the License for the specific language governing permissions and
15+
limitations under the License.
16+
*/
17+
18+
package migrationscripts
19+
20+
import (
21+
"github.com/apache/incubator-devlake/core/context"
22+
"github.com/apache/incubator-devlake/core/dal"
23+
"github.com/apache/incubator-devlake/core/errors"
24+
)
25+
26+
type resetS3FileMetaProcessed struct{}
27+
28+
func (*resetS3FileMetaProcessed) Up(basicRes context.BasicRes) errors.Error {
29+
db := basicRes.GetDal()
30+
31+
// Reset processed flag so data will be re-extracted with the new
32+
// dedup-safe composite-PK schema on next pipeline run
33+
err := db.UpdateColumn(
34+
"_tool_q_dev_s3_file_meta",
35+
"processed", false,
36+
dal.Where("1 = 1"),
37+
)
38+
if err != nil {
39+
return errors.Default.Wrap(err, "failed to reset s3_file_meta processed flag")
40+
}
41+
42+
return nil
43+
}
44+
45+
func (*resetS3FileMetaProcessed) Version() uint64 {
46+
return 20260228000002
47+
}
48+
49+
func (*resetS3FileMetaProcessed) Name() string {
50+
return "Reset s3_file_meta processed flag to re-extract data with dedup-safe schema"
51+
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/*
2+
Licensed to the Apache Software Foundation (ASF) under one or more
3+
contributor license agreements. See the NOTICE file distributed with
4+
this work for additional information regarding copyright ownership.
5+
The ASF licenses this file to You under the Apache License, Version 2.0
6+
(the "License"); you may not use this file except in compliance with
7+
the License. You may obtain a copy of the License at
8+
9+
http://www.apache.org/licenses/LICENSE-2.0
10+
11+
Unless required by applicable law or agreed to in writing, software
12+
distributed under the License is distributed on an "AS IS" BASIS,
13+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
See the License for the specific language governing permissions and
15+
limitations under the License.
16+
*/
17+
18+
package archived
19+
20+
import (
21+
"time"
22+
23+
"github.com/apache/incubator-devlake/core/models/migrationscripts/archived"
24+
)
25+
26+
type QDevUserDataV2 struct {
27+
archived.NoPKModel
28+
ConnectionId uint64 `gorm:"primaryKey"`
29+
ScopeId string `gorm:"primaryKey;type:varchar(255)" json:"scopeId"`
30+
UserId string `gorm:"primaryKey;type:varchar(255)" json:"userId"`
31+
Date time.Time `gorm:"primaryKey;type:date" json:"date"`
32+
DisplayName string `gorm:"type:varchar(255)" json:"displayName"`
33+
CodeReview_FindingsCount int
34+
CodeReview_SucceededEventCount int
35+
InlineChat_AcceptanceEventCount int
36+
InlineChat_AcceptedLineAdditions int
37+
InlineChat_AcceptedLineDeletions int
38+
InlineChat_DismissalEventCount int
39+
InlineChat_DismissedLineAdditions int
40+
InlineChat_DismissedLineDeletions int
41+
InlineChat_RejectedLineAdditions int
42+
InlineChat_RejectedLineDeletions int
43+
InlineChat_RejectionEventCount int
44+
InlineChat_TotalEventCount int
45+
Inline_AICodeLines int
46+
Inline_AcceptanceCount int
47+
Inline_SuggestionsCount int
48+
Chat_AICodeLines int
49+
Chat_MessagesInteracted int
50+
Chat_MessagesSent int
51+
CodeFix_AcceptanceEventCount int
52+
CodeFix_AcceptedLines int
53+
CodeFix_GeneratedLines int
54+
CodeFix_GenerationEventCount int
55+
CodeReview_FailedEventCount int
56+
Dev_AcceptanceEventCount int
57+
Dev_AcceptedLines int
58+
Dev_GeneratedLines int
59+
Dev_GenerationEventCount int
60+
DocGeneration_AcceptedFileUpdates int
61+
DocGeneration_AcceptedFilesCreations int
62+
DocGeneration_AcceptedLineAdditions int
63+
DocGeneration_AcceptedLineUpdates int
64+
DocGeneration_EventCount int
65+
DocGeneration_RejectedFileCreations int
66+
DocGeneration_RejectedFileUpdates int
67+
DocGeneration_RejectedLineAdditions int
68+
DocGeneration_RejectedLineUpdates int
69+
TestGeneration_AcceptedLines int
70+
TestGeneration_AcceptedTests int
71+
TestGeneration_EventCount int
72+
TestGeneration_GeneratedLines int
73+
TestGeneration_GeneratedTests int
74+
Transformation_EventCount int
75+
Transformation_LinesGenerated int
76+
Transformation_LinesIngested int
77+
}
78+
79+
func (QDevUserDataV2) TableName() string {
80+
return "_tool_q_dev_user_data"
81+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/*
2+
Licensed to the Apache Software Foundation (ASF) under one or more
3+
contributor license agreements. See the NOTICE file distributed with
4+
this work for additional information regarding copyright ownership.
5+
The ASF licenses this file to You under the Apache License, Version 2.0
6+
(the "License"); you may not use this file except in compliance with
7+
the License. You may obtain a copy of the License at
8+
9+
http://www.apache.org/licenses/LICENSE-2.0
10+
11+
Unless required by applicable law or agreed to in writing, software
12+
distributed under the License is distributed on an "AS IS" BASIS,
13+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
See the License for the specific language governing permissions and
15+
limitations under the License.
16+
*/
17+
18+
package archived
19+
20+
import (
21+
"time"
22+
23+
"github.com/apache/incubator-devlake/core/models/migrationscripts/archived"
24+
)
25+
26+
type QDevUserReportV2 struct {
27+
archived.NoPKModel
28+
ConnectionId uint64 `gorm:"primaryKey"`
29+
ScopeId string `gorm:"primaryKey;type:varchar(255)" json:"scopeId"`
30+
UserId string `gorm:"primaryKey;type:varchar(255)" json:"userId"`
31+
Date time.Time `gorm:"primaryKey;type:date" json:"date"`
32+
ClientType string `gorm:"primaryKey;type:varchar(50)" json:"clientType"`
33+
DisplayName string `gorm:"type:varchar(255)" json:"displayName"`
34+
SubscriptionTier string `gorm:"type:varchar(50)" json:"subscriptionTier"`
35+
ProfileId string `gorm:"type:varchar(512)" json:"profileId"`
36+
ChatConversations int `json:"chatConversations"`
37+
CreditsUsed float64 `json:"creditsUsed"`
38+
OverageCap float64 `json:"overageCap"`
39+
OverageCreditsUsed float64 `json:"overageCreditsUsed"`
40+
OverageEnabled bool `json:"overageEnabled"`
41+
TotalMessages int `json:"totalMessages"`
42+
}
43+
44+
func (QDevUserReportV2) TableName() string {
45+
return "_tool_q_dev_user_report"
46+
}

backend/plugins/q_dev/models/migrationscripts/register.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,5 +33,7 @@ func All() []plugin.MigrationScript {
3333
new(addScopeIdFields),
3434
new(addUserReportTable),
3535
new(addAccountIdToS3Slice),
36+
new(fixDedupUserTables),
37+
new(resetS3FileMetaProcessed),
3638
}
3739
}

backend/plugins/q_dev/models/user_data.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@ import (
2525

2626
// QDevUserData 存储从CSV中提取的原始数据
2727
type QDevUserData struct {
28-
common.Model
28+
common.NoPKModel
2929
ConnectionId uint64 `gorm:"primaryKey"`
30-
UserId string `gorm:"index" json:"userId"`
31-
Date time.Time `gorm:"index" json:"date"`
32-
DisplayName string `gorm:"type:varchar(255)" json:"displayName"` // New field for user display name
33-
ScopeId string `gorm:"index;type:varchar(255)" json:"scopeId"`
30+
ScopeId string `gorm:"primaryKey;type:varchar(255)" json:"scopeId"`
31+
UserId string `gorm:"primaryKey;type:varchar(255)" json:"userId"`
32+
Date time.Time `gorm:"primaryKey;type:date" json:"date"`
33+
DisplayName string `gorm:"type:varchar(255)" json:"displayName"`
3434

3535
CodeReview_FindingsCount int
3636
CodeReview_SucceededEventCount int

backend/plugins/q_dev/models/user_report.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,13 @@ import (
2424
)
2525

2626
type QDevUserReport struct {
27-
common.Model
27+
common.NoPKModel
2828
ConnectionId uint64 `gorm:"primaryKey"`
29-
UserId string `gorm:"index" json:"userId"`
30-
Date time.Time `gorm:"index" json:"date"`
29+
ScopeId string `gorm:"primaryKey;type:varchar(255)" json:"scopeId"`
30+
UserId string `gorm:"primaryKey;type:varchar(255)" json:"userId"`
31+
Date time.Time `gorm:"primaryKey;type:date" json:"date"`
32+
ClientType string `gorm:"primaryKey;type:varchar(50)" json:"clientType"`
3133
DisplayName string `gorm:"type:varchar(255)" json:"displayName"`
32-
ScopeId string `gorm:"index;type:varchar(255)" json:"scopeId"`
33-
ClientType string `gorm:"type:varchar(50)" json:"clientType"`
3434
SubscriptionTier string `gorm:"type:varchar(50)" json:"subscriptionTier"`
3535
ProfileId string `gorm:"type:varchar(512)" json:"profileId"`
3636
ChatConversations int `json:"chatConversations"`

backend/plugins/q_dev/tasks/s3_data_extractor.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ func processCSVData(taskCtx plugin.SubTaskContext, db dal.Dal, reader io.ReadClo
147147
if err != nil {
148148
return errors.Default.Wrap(err, "failed to create user report data")
149149
}
150-
err = db.Create(reportData)
150+
err = db.CreateOrUpdate(reportData)
151151
if err != nil {
152152
return errors.Default.Wrap(err, "failed to save user report data")
153153
}
@@ -158,8 +158,7 @@ func processCSVData(taskCtx plugin.SubTaskContext, db dal.Dal, reader io.ReadClo
158158
return errors.Default.Wrap(err, "failed to create user data")
159159
}
160160

161-
// Save to database - no need to check for duplicates since we're processing each file only once
162-
err = db.Create(userData)
161+
err = db.CreateOrUpdate(userData)
163162
if err != nil {
164163
return errors.Default.Wrap(err, "failed to save user data")
165164
}

0 commit comments

Comments
 (0)