incubator-devlake/backend/helpers/pluginhelper/api/graphql_async_client.go at c8927e0ab9d00d2bb5dd788047945702745e39ab · apache/incubator-devlake · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
/*
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements.  See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License.  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package api

import (
	"context"
	"fmt"
	"strconv"
	"sync"
	"time"

	"github.com/apache/incubator-devlake/core/errors"
	"github.com/apache/incubator-devlake/core/log"
	"github.com/apache/incubator-devlake/core/plugin"
	"github.com/apache/incubator-devlake/core/utils"

	"github.com/merico-ai/graphql"
)

// GraphqlAsyncClient send graphql one by one
type GraphqlAsyncClient struct {
	ctx       context.Context
	cancel    context.CancelFunc
	client    *graphql.Client
	logger    log.Logger
	mu        sync.Mutex
	waitGroup sync.WaitGroup

	maxRetry         int
	waitBeforeRetry  time.Duration
	rateExhaustCond  *sync.Cond
	rateRemaining    int
	getRateRemaining func(context.Context, *graphql.Client, log.Logger) (rateRemaining int, resetAt *time.Time, err errors.Error)
	getRateCost      func(q interface{}) int
}

// defaultRateLimitConst is the generic fallback rate limit for GraphQL requests.
// It is used as the initial remaining quota when dynamic rate limit
// information is unavailable from the provider.
const defaultRateLimitConst = 1000

// CreateAsyncGraphqlClient creates a new GraphqlAsyncClient
func CreateAsyncGraphqlClient(
	taskCtx plugin.TaskContext,
	graphqlClient *graphql.Client,
	logger log.Logger,
	getRateRemaining func(context.Context, *graphql.Client, log.Logger) (rateRemaining int, resetAt *time.Time, err errors.Error),
	opts ...func(*GraphqlAsyncClient),
) (*GraphqlAsyncClient, errors.Error) {
	ctxWithCancel, cancel := context.WithCancel(taskCtx.GetContext())

	graphqlAsyncClient := &GraphqlAsyncClient{
		ctx:              ctxWithCancel,
		cancel:           cancel,
		client:           graphqlClient,
		logger:           logger,
		rateExhaustCond:  sync.NewCond(&sync.Mutex{}),
		rateRemaining:    defaultRateLimitConst,
		getRateRemaining: getRateRemaining,
	}

	// apply options
	for _, opt := range opts {
		opt(graphqlAsyncClient)
	}

	// Env config wins over everything, only if explicitly set
	if rateLimit := resolveRateLimit(taskCtx, logger); rateLimit != -1 {
		logger.Info("GRAPHQL_RATE_LIMIT env override applied: %d (was %d)", rateLimit, graphqlAsyncClient.rateRemaining)
		graphqlAsyncClient.rateRemaining = rateLimit
	}

	if getRateRemaining != nil {
		rateRemaining, resetAt, err := getRateRemaining(taskCtx.GetContext(), graphqlClient, logger)
		if err != nil {
			graphqlAsyncClient.logger.Info("failed to fetch initial graphql rate limit, fallback to default: %v", err)
			graphqlAsyncClient.updateRateRemaining(graphqlAsyncClient.rateRemaining, nil)
		} else {
			graphqlAsyncClient.updateRateRemaining(rateRemaining, resetAt)
		}
	} else {
		graphqlAsyncClient.updateRateRemaining(graphqlAsyncClient.rateRemaining, nil)
	}

	// load retry/timeout from configuration
	// use API_RETRY as max retry time
	// use API_TIMEOUT as retry before wait seconds to confirm the prev request finish
	timeout := 30 * time.Second
	retry, err := utils.StrToIntOr(taskCtx.GetConfig("API_RETRY"), 3)
	if err != nil {
		return nil, errors.BadInput.Wrap(err, "failed to parse API_RETRY")
	}
	timeoutConf := taskCtx.GetConfig("API_TIMEOUT")
	if timeoutConf != "" {
		// override timeout value if API_TIMEOUT is provided
		timeout, err = errors.Convert01(time.ParseDuration(timeoutConf))
		if err != nil {
			return nil, errors.BadInput.Wrap(err, "failed to parse API_TIMEOUT")
		}
	}
	graphqlAsyncClient.SetMaxRetry(retry, timeout)

	return graphqlAsyncClient, nil
}

// GetMaxRetry returns the maximum retry attempts for a request
func (apiClient *GraphqlAsyncClient) GetMaxRetry() (int, time.Duration) {
	return apiClient.maxRetry, apiClient.waitBeforeRetry
}

// SetMaxRetry sets the maximum retry attempts for a request
func (apiClient *GraphqlAsyncClient) SetMaxRetry(
	maxRetry int,
	waitBeforeRetry time.Duration,
) {
	apiClient.maxRetry = maxRetry
	apiClient.waitBeforeRetry = waitBeforeRetry
}

// updateRateRemaining call getRateRemaining to update rateRemaining periodically
func (apiClient *GraphqlAsyncClient) updateRateRemaining(rateRemaining int, resetAt *time.Time) {
	apiClient.rateRemaining = rateRemaining
	if rateRemaining > 0 {
		apiClient.rateExhaustCond.Signal()
	}
	go func() {
		if apiClient.getRateRemaining == nil {
			return
		}

		nextDuring := 3 * time.Minute
		if resetAt != nil && resetAt.After(time.Now()) {
			nextDuring = time.Until(*resetAt)
		}
		select {
		case <-apiClient.ctx.Done():
			// finish go routine when context done
			return
		case <-time.After(nextDuring):
			newRateRemaining, newResetAt, err := apiClient.getRateRemaining(apiClient.ctx, apiClient.client, apiClient.logger)
			if err != nil {
				apiClient.logger.Info("failed to update graphql rate limit, will retry next cycle: %v", err)
				apiClient.updateRateRemaining(apiClient.rateRemaining, nil)
				return
			}
			apiClient.updateRateRemaining(newRateRemaining, newResetAt)
		}
	}()
}

// SetGetRateCost to calculate how many rate cost
// if not set, all query just cost 1
func (apiClient *GraphqlAsyncClient) SetGetRateCost(getRateCost func(q interface{}) int) {
	apiClient.getRateCost = getRateCost
}

// Query send a graphql request when get lock
// []graphql.DataError are the errors returned in response body
// errors.Error is other error
func (apiClient *GraphqlAsyncClient) Query(q interface{}, variables map[string]interface{}) ([]graphql.DataError, error) {
	apiClient.waitGroup.Add(1)
	defer apiClient.waitGroup.Done()
	apiClient.mu.Lock()
	defer apiClient.mu.Unlock()

	apiClient.rateExhaustCond.L.Lock()
	defer apiClient.rateExhaustCond.L.Unlock()
	for apiClient.rateRemaining <= 0 {
		apiClient.logger.Info(`rate limit remaining exhausted, waiting for next period.`)
		apiClient.rateExhaustCond.Wait()
	}

	retryTime := 0
	var err error
	//  if it needs retry, check and retry
	for retryTime < apiClient.maxRetry {
		select {
		case <-apiClient.ctx.Done():
			return nil, nil
		default:
			var dataErrors []graphql.DataError
			dataErrors, err := apiClient.client.Query(apiClient.ctx, q, variables)
			if err == context.Canceled {
				return nil, err
			}
			if err != nil {
				apiClient.logger.Warn(err, "retry #%d graphql calling after %ds", retryTime, apiClient.waitBeforeRetry/time.Second)
				retryTime++
				<-time.After(apiClient.waitBeforeRetry)
				continue
			}
			if dataErrors != nil {
				return dataErrors, nil
			}
			cost := 1
			if apiClient.getRateCost != nil {
				cost = apiClient.getRateCost(q)
			}
			apiClient.rateRemaining -= cost
			apiClient.logger.Debug(`query cost %d in %v`, cost, variables)
			return nil, nil
		}
	}
	return nil, errors.Default.Wrap(err, fmt.Sprintf("got error when querying GraphQL (from the %dth retry)", retryTime))
}

// NextTick to return the NextTick of scheduler
func (apiClient *GraphqlAsyncClient) NextTick(task func() errors.Error, taskErrorChecker func(err error)) {
	// to make sure task will be enqueued
	apiClient.waitGroup.Add(1)
	go func() {
		select {
		case <-apiClient.ctx.Done():
			return
		default:
			go func() {
				// if set waitGroup done here, a serial of goroutine will block until sub-goroutine finish.
				// But if done out of this go func, so task will run after waitGroup finish
				// I have no idea about this now...
				defer apiClient.waitGroup.Done()
				taskErrorChecker(task())
			}()
		}
	}()
}

// Wait blocks until all async requests were done
func (apiClient *GraphqlAsyncClient) Wait() {
	apiClient.waitGroup.Wait()
}

// Release will release the ApiAsyncClient with scheduler
func (apiClient *GraphqlAsyncClient) Release() {
	apiClient.cancel()
}

// WithFallbackRateLimit sets the initial/fallback rate limit used when
// rate limit information cannot be fetched dynamically.
// This value may be overridden later by getRateRemaining.
func WithFallbackRateLimit(limit int) func(*GraphqlAsyncClient) {
	return func(c *GraphqlAsyncClient) {
		if limit > 0 {
			c.rateRemaining = limit
		}
	}
}

// resolveRateLimit returns -1 if GRAPHQL_RATE_LIMIT is not set or invalid
func resolveRateLimit(taskCtx plugin.TaskContext, logger log.Logger) int {
	if v := taskCtx.GetConfig("GRAPHQL_RATE_LIMIT"); v != "" {
		if parsed, err := strconv.Atoi(v); err == nil {
			return parsed
		}
		logger.Warn(nil, "invalid GRAPHQL_RATE_LIMIT, using default")
	}
	return -1
}