Skip to content

Commit b9f9dd5

Browse files
committed
(upstream) chore: Add backfill time window to ensure data consistency
chore: Add backfill time window to ensure data consistency (cherry picked from commit 52a9555b782860f9c8ba9db409bd56e0c8f58272)
1 parent 3a1cf9c commit b9f9dd5

5 files changed

Lines changed: 244 additions & 94 deletions

File tree

backend/plugins/gh-copilot/tasks/enterprise_metrics_collector.go

Lines changed: 2 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ package tasks
2020
import (
2121
"encoding/json"
2222
"fmt"
23-
"io"
2423
"net/http"
2524
"net/url"
2625
"time"
@@ -76,6 +75,7 @@ func CollectEnterpriseMetrics(taskCtx plugin.SubTaskContext) errors.Error {
7675

7776
now := time.Now().UTC()
7877
start, until := computeReportDateRange(now, collector.GetSince())
78+
start = clampDailyMetricsStartForBackfill(start, until)
7979
logger := taskCtx.GetLogger()
8080

8181
dayIter := newDayIterator(start, until)
@@ -95,42 +95,7 @@ func CollectEnterpriseMetrics(taskCtx plugin.SubTaskContext) errors.Error {
9595
Concurrency: 1,
9696
AfterResponse: ignore404,
9797
ResponseParser: func(res *http.Response) ([]json.RawMessage, errors.Error) {
98-
// Parse metadata response to get download links
99-
body, readErr := io.ReadAll(res.Body)
100-
res.Body.Close()
101-
if readErr != nil {
102-
return nil, errors.Default.Wrap(readErr, "failed to read report metadata")
103-
}
104-
105-
var meta reportMetadataResponse
106-
if jsonErr := json.Unmarshal(body, &meta); jsonErr != nil {
107-
snippet := string(body)
108-
if len(snippet) > 200 {
109-
snippet = snippet[:200]
110-
}
111-
logger.Error(jsonErr, "failed to parse report metadata, body=%s", snippet)
112-
return nil, errors.Default.Wrap(jsonErr, "failed to parse report metadata")
113-
}
114-
115-
if len(meta.DownloadLinks) == 0 {
116-
logger.Info("No download links for report day=%s, skipping", meta.ReportDay)
117-
return nil, nil
118-
}
119-
120-
// Download each report file and return contents as raw messages
121-
var results []json.RawMessage
122-
for _, link := range meta.DownloadLinks {
123-
reportBody, dlErr := downloadReport(link, logger)
124-
if dlErr != nil {
125-
logger.Error(nil, "failed to download report for day=%s: %s", meta.ReportDay, dlErr.Error())
126-
return nil, dlErr
127-
}
128-
if reportBody == nil {
129-
continue // blob not found, skip
130-
}
131-
results = append(results, json.RawMessage(reportBody))
132-
}
133-
return results, nil
98+
return parseRawReportResponse(res, logger)
13499
},
135100
})
136101
if err != nil {

backend/plugins/gh-copilot/tasks/metrics_collector_test.go

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ limitations under the License.
1818
package tasks
1919

2020
import (
21+
"bytes"
22+
"io"
2123
"net/http"
2224
"testing"
2325
"time"
@@ -45,6 +47,7 @@ func TestComputeReportDateRangeDefaultLookback(t *testing.T) {
4547
}
4648

4749
func TestComputeReportDateRangeUsesSince(t *testing.T) {
50+
// since is far enough in the past that the lookback buffer doesn't apply.
4851
now := time.Date(2025, 1, 10, 12, 0, 0, 0, time.UTC)
4952
since := time.Date(2025, 1, 3, 12, 0, 0, 0, time.UTC)
5053
start, until := computeReportDateRange(now, &since)
@@ -61,9 +64,80 @@ func TestComputeReportDateRangeClampsToLookback(t *testing.T) {
6164
}
6265

6366
func TestComputeReportDateRangeClampsFutureSince(t *testing.T) {
67+
// Future since is clamped to until, then the lookback buffer applies.
6468
now := time.Date(2025, 1, 10, 12, 0, 0, 0, time.UTC)
6569
since := now.Add(24 * time.Hour)
6670
start, until := computeReportDateRange(now, &since)
6771
require.Equal(t, time.Date(2025, 1, 9, 0, 0, 0, 0, time.UTC), until)
68-
require.Equal(t, time.Date(2025, 1, 9, 0, 0, 0, 0, time.UTC), start)
72+
require.Equal(t, time.Date(2025, 1, 7, 0, 0, 0, 0, time.UTC), start)
73+
}
74+
75+
func TestComputeReportDateRangeLookbackBuffer(t *testing.T) {
76+
// since is yesterday: without the buffer we'd only request 1 day (yesterday).
77+
// With the buffer we look back reportLookbackDays days to retry any 404'd days.
78+
now := time.Date(2025, 1, 10, 0, 0, 0, 0, time.UTC) // midnight run
79+
since := time.Date(2025, 1, 9, 0, 0, 0, 0, time.UTC) // LatestSuccessStart from previous midnight run
80+
start, until := computeReportDateRange(now, &since)
81+
require.Equal(t, time.Date(2025, 1, 9, 0, 0, 0, 0, time.UTC), until)
82+
require.Equal(t, time.Date(2025, 1, 7, 0, 0, 0, 0, time.UTC), start)
83+
}
84+
85+
func TestClampDailyMetricsStartForBackfillRecentStart(t *testing.T) {
86+
until := time.Date(2025, 1, 9, 0, 0, 0, 0, time.UTC)
87+
start := time.Date(2025, 1, 7, 0, 0, 0, 0, time.UTC)
88+
89+
clamped := clampDailyMetricsStartForBackfill(start, until)
90+
require.Equal(t, time.Date(2025, 1, 6, 0, 0, 0, 0, time.UTC), clamped)
91+
}
92+
93+
func TestClampDailyMetricsStartForBackfillKeepsOlderStart(t *testing.T) {
94+
until := time.Date(2025, 1, 9, 0, 0, 0, 0, time.UTC)
95+
start := time.Date(2025, 1, 3, 0, 0, 0, 0, time.UTC)
96+
97+
clamped := clampDailyMetricsStartForBackfill(start, until)
98+
require.Equal(t, start, clamped)
99+
}
100+
101+
func TestUserMetricsDateRangeAppliesFourDayBackfillWindow(t *testing.T) {
102+
now := time.Date(2025, 1, 10, 0, 0, 0, 0, time.UTC)
103+
since := time.Date(2025, 1, 9, 0, 0, 0, 0, time.UTC)
104+
105+
start, until := computeReportDateRange(now, &since)
106+
start = clampDailyMetricsStartForBackfill(start, until)
107+
108+
require.Equal(t, time.Date(2025, 1, 9, 0, 0, 0, 0, time.UTC), until)
109+
require.Equal(t, time.Date(2025, 1, 6, 0, 0, 0, 0, time.UTC), start)
110+
}
111+
112+
func TestParseReportMetadataResponseNoContent(t *testing.T) {
113+
res := &http.Response{
114+
StatusCode: http.StatusNoContent,
115+
Body: io.NopCloser(bytes.NewReader(nil)),
116+
}
117+
118+
meta, err := parseReportMetadataResponse(res, nil)
119+
require.NoError(t, err)
120+
require.Nil(t, meta)
121+
}
122+
123+
func TestParseReportMetadataResponseEmptyBody(t *testing.T) {
124+
res := &http.Response{
125+
StatusCode: http.StatusOK,
126+
Body: io.NopCloser(bytes.NewReader(nil)),
127+
}
128+
129+
meta, err := parseReportMetadataResponse(res, nil)
130+
require.NoError(t, err)
131+
require.Nil(t, meta)
132+
}
133+
134+
func TestParseReportMetadataResponseEmptyString(t *testing.T) {
135+
res := &http.Response{
136+
StatusCode: http.StatusOK,
137+
Body: io.NopCloser(bytes.NewReader([]byte(`""`))),
138+
}
139+
140+
meta, err := parseReportMetadataResponse(res, nil)
141+
require.NoError(t, err)
142+
require.Nil(t, meta)
69143
}

backend/plugins/gh-copilot/tasks/org_metrics_collector.go

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ package tasks
2020
import (
2121
"encoding/json"
2222
"fmt"
23-
"io"
2423
"net/http"
2524
"net/url"
2625
"time"
@@ -70,6 +69,7 @@ func CollectOrgMetrics(taskCtx plugin.SubTaskContext) errors.Error {
7069

7170
now := time.Now().UTC()
7271
start, until := computeReportDateRange(now, collector.GetSince())
72+
start = clampDailyMetricsStartForBackfill(start, until)
7373
logger := taskCtx.GetLogger()
7474

7575
dayIter := newDayIterator(start, until)
@@ -89,29 +89,7 @@ func CollectOrgMetrics(taskCtx plugin.SubTaskContext) errors.Error {
8989
Concurrency: 1,
9090
AfterResponse: ignore404,
9191
ResponseParser: func(res *http.Response) ([]json.RawMessage, errors.Error) {
92-
body, readErr := io.ReadAll(res.Body)
93-
res.Body.Close()
94-
if readErr != nil {
95-
return nil, errors.Default.Wrap(readErr, "failed to read report metadata")
96-
}
97-
98-
var meta reportMetadataResponse
99-
if jsonErr := json.Unmarshal(body, &meta); jsonErr != nil {
100-
return nil, errors.Default.Wrap(jsonErr, "failed to parse report metadata")
101-
}
102-
103-
var results []json.RawMessage
104-
for _, link := range meta.DownloadLinks {
105-
reportBody, dlErr := downloadReport(link, logger)
106-
if dlErr != nil {
107-
return nil, dlErr
108-
}
109-
if reportBody == nil {
110-
continue // blob not found, skip
111-
}
112-
results = append(results, json.RawMessage(reportBody))
113-
}
114-
return results, nil
92+
return parseRawReportResponse(res, logger)
11593
},
11694
})
11795
if err != nil {

backend/plugins/gh-copilot/tasks/report_download_helper.go

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,14 @@ import (
3333
// reportMaxDays is the maximum historical window the new report API supports (1 year).
3434
const reportMaxDays = 365
3535

36+
// reportLookbackDays: extra days rewound from 'until' on incremental runs.
37+
// GitHub reports are generated hours after midnight, so a midnight run gets 404 for the previous
38+
// day. Without this buffer, 'LatestSuccessStart' advances past the missed day permanently.
39+
const reportLookbackDays = 2
40+
41+
// dailyMetricsTrailingBackfillDays extends retries for delayed daily report generation.
42+
const dailyMetricsTrailingBackfillDays = 4
43+
3644
// copilotRawParams identifies a set of raw data records for a given connection/scope.
3745
type copilotRawParams struct {
3846
ConnectionId uint64
@@ -60,6 +68,14 @@ func ignore404(res *http.Response) errors.Error {
6068
return nil
6169
}
6270

71+
func clampDailyMetricsStartForBackfill(start, until time.Time) time.Time {
72+
trailingStart := until.AddDate(0, 0, -(dailyMetricsTrailingBackfillDays - 1))
73+
if start.After(trailingStart) {
74+
return trailingStart
75+
}
76+
return start
77+
}
78+
6379
// reportMetadataResponse represents the JSON returned by the report metadata endpoints.
6480
type reportMetadataResponse struct {
6581
DownloadLinks []string `json:"download_links"`
@@ -69,7 +85,120 @@ type reportMetadataResponse struct {
6985
ReportEndDay string `json:"report_end_day"`
7086
}
7187

88+
func readReportMetadataBody(res *http.Response) ([]byte, errors.Error) {
89+
body, readErr := io.ReadAll(res.Body)
90+
res.Body.Close()
91+
if readErr != nil {
92+
return nil, errors.Default.Wrap(readErr, "failed to read report metadata")
93+
}
94+
return body, nil
95+
}
96+
97+
func logReportMetadataParseError(body []byte, err error, logger log.Logger) {
98+
if logger == nil {
99+
return
100+
}
101+
snippet := string(body)
102+
if len(snippet) > 200 {
103+
snippet = snippet[:200]
104+
}
105+
logger.Error(err, "failed to parse report metadata, body=%s", snippet)
106+
}
107+
108+
func reportMetadataRange(meta reportMetadataResponse) string {
109+
if meta.ReportDay != "" {
110+
return meta.ReportDay
111+
}
112+
if meta.ReportStartDay != "" && meta.ReportEndDay != "" {
113+
return fmt.Sprintf("%s..%s", meta.ReportStartDay, meta.ReportEndDay)
114+
}
115+
return ""
116+
}
117+
118+
func logMissingDownloadLinks(meta reportMetadataResponse, logger log.Logger) {
119+
if logger == nil || len(meta.DownloadLinks) != 0 {
120+
return
121+
}
122+
reportRange := reportMetadataRange(meta)
123+
if reportRange != "" {
124+
logger.Info("No download links for report day=%s, skipping", reportRange)
125+
return
126+
}
127+
logger.Info("No download links in report metadata, skipping")
128+
}
129+
130+
func parseReportMetadata(body []byte, logger log.Logger) (*reportMetadataResponse, errors.Error) {
131+
trimmed := bytes.TrimSpace(body)
132+
if len(trimmed) == 0 {
133+
if logger != nil {
134+
logger.Info("Report metadata response was empty, skipping")
135+
}
136+
return nil, nil
137+
}
138+
139+
// Handle JSON-encoded empty string ""
140+
if bytes.Equal(trimmed, []byte(`""`)) {
141+
if logger != nil {
142+
logger.Info("Report metadata response was empty string, skipping")
143+
}
144+
return nil, nil
145+
}
146+
147+
var meta reportMetadataResponse
148+
if jsonErr := json.Unmarshal(trimmed, &meta); jsonErr != nil {
149+
logReportMetadataParseError(trimmed, jsonErr, logger)
150+
return nil, errors.Default.Wrap(jsonErr, "failed to parse report metadata")
151+
}
152+
153+
logMissingDownloadLinks(meta, logger)
154+
155+
return &meta, nil
156+
}
157+
158+
func parseReportMetadataResponse(res *http.Response, logger log.Logger) (*reportMetadataResponse, errors.Error) {
159+
if res.StatusCode == http.StatusNoContent {
160+
if logger != nil {
161+
logger.Info("Report metadata not ready yet (204), skipping for now")
162+
}
163+
res.Body.Close()
164+
return nil, nil
165+
}
166+
167+
body, readErr := readReportMetadataBody(res)
168+
if readErr != nil {
169+
return nil, readErr
170+
}
171+
172+
return parseReportMetadata(body, logger)
173+
}
174+
175+
func collectRawReportRecords(downloadLinks []string, logger log.Logger) ([]json.RawMessage, errors.Error) {
176+
var results []json.RawMessage
177+
for _, link := range downloadLinks {
178+
reportBody, dlErr := downloadReport(link, logger)
179+
if dlErr != nil {
180+
return nil, dlErr
181+
}
182+
if reportBody == nil {
183+
continue
184+
}
185+
results = append(results, json.RawMessage(reportBody))
186+
}
187+
return results, nil
188+
}
189+
190+
func parseRawReportResponse(res *http.Response, logger log.Logger) ([]json.RawMessage, errors.Error) {
191+
meta, err := parseReportMetadataResponse(res, logger)
192+
if err != nil || meta == nil {
193+
return nil, err
194+
}
195+
196+
return collectRawReportRecords(meta.DownloadLinks, logger)
197+
}
198+
72199
// computeReportDateRange returns the range of dates to collect, clamped to the API max.
200+
// When 'since' is set, 'start' is rewound to at least 'until - reportLookbackDays'
201+
// so days that returned 404 (report not yet generated) are retried on subsequent runs.
73202
func computeReportDateRange(now time.Time, since *time.Time) (start, until time.Time) {
74203
until = utcDate(now).AddDate(0, 0, -1) // reports are available for the previous day
75204
min := until.AddDate(0, 0, -(reportMaxDays - 1))
@@ -82,6 +211,10 @@ func computeReportDateRange(now time.Time, since *time.Time) (start, until time.
82211
if start.After(until) {
83212
start = until
84213
}
214+
// Rewind 'start' by 'reportLookbackDays' so recently-missed days are retried.
215+
if lookback := until.AddDate(0, 0, -reportLookbackDays); start.After(lookback) {
216+
start = lookback
217+
}
85218
}
86219
return start, until
87220
}

0 commit comments

Comments
 (0)