metadata_service.go raw
1 package credentials
2
3 import (
4 "context"
5 "encoding/json"
6 "errors"
7 "fmt"
8 "io"
9 "net"
10 "net/http"
11 "net/http/httputil"
12 "os"
13 "sync"
14 "time"
15
16 "github.com/hashicorp/go-retryablehttp"
17 "go.uber.org/zap"
18 )
19
20 const (
21 metadataIamTokenValidityThreshold = time.Second * 5
22 metadataIamTokenRefreshTimeout = time.Second * 10
23
24 // metadataTokenCappedExpirationSeconds is a constant for a case you can find below. This implementation overrides
25 // the expiration duration to 60 seconds max to handle cases when the token gets invalid: E.g. instance sa is
26 // stripped of all permissions (or even worse, deleted), instance is updated to use another sa with valid
27 // permissions; however, since the token is not expired it is still used for subsequent requests resulting in
28 // Permission Denied. Shorter expiration time is a workaround
29 metadataTokenCappedExpirationDuration = time.Second * 60
30 metadataTokenRefreshInterval = metadataTokenCappedExpirationDuration - metadataIamTokenRefreshTimeout
31 )
32
33 var _ NonExchangeableCredentials = (*metadataServiceCredentialProvider)(nil)
34 var _ MetadataServiceCredentialProvider = (*metadataServiceCredentialProvider)(nil)
35
36 type MetadataServiceCredentialProvider interface {
37 NonExchangeableCredentials
38
39 Addr() string
40 Available(ctx context.Context) bool
41 }
42
43 type metadataServiceCredentialProvider struct {
44 metadataServiceAddr string
45 client retryablehttp.Client
46
47 currentTokenMutex sync.RWMutex
48 currentToken string
49 currentTokenRealExpiration time.Time
50
51 refreshMutex sync.Mutex
52 lastRefreshTime time.Time
53
54 refreshStop chan struct{}
55 refreshTicker *time.Ticker
56 logger *zap.Logger
57 }
58
59 var providerInstances = make(map[string]MetadataServiceCredentialProvider)
60 var providerInstancesLock sync.Mutex
61
62 // MetadataService returns credentials provider that queries local metadata service for IAM tokens
63 // This is currently available on Yandex Cloud Compute Instances instances with a Service Account attached
64 // https://yandex.cloud/ru/docs/compute/concepts/vm-metadata
65 func MetadataService() MetadataServiceCredentialProvider {
66 return NewMetadataServiceCredentialProvider(GetMetadataServiceAddr())
67 }
68
69 func NewMetadataServiceCredentialProvider(metadataServiceAddr string) MetadataServiceCredentialProvider {
70 providerInstancesLock.Lock()
71 defer providerInstancesLock.Unlock()
72
73 if provider, ok := providerInstances[metadataServiceAddr]; ok {
74 return provider
75 }
76
77 provider := metadataServiceCredentialProvider{
78 metadataServiceAddr: metadataServiceAddr,
79 client: retryablehttp.Client{
80 HTTPClient: &http.Client{
81 Transport: &http.Transport{
82 DialContext: (&net.Dialer{
83 Timeout: time.Second, // One second should be enough for localhost connection.
84 KeepAlive: -1, // No keep alive. Near token per hour requested.
85 }).DialContext,
86 ResponseHeaderTimeout: 5 * time.Second, // Prevent hanging after successful request write.
87 },
88 },
89 RetryMax: 5,
90 CheckRetry: retryablehttp.DefaultRetryPolicy,
91 Backoff: retryablehttp.DefaultBackoff,
92 },
93 refreshStop: make(chan struct{}),
94 refreshTicker: time.NewTicker(metadataTokenRefreshInterval),
95 logger: zap.NewNop(),
96 }
97 go provider.tokenRefreshLoop()
98
99 providerInstances[metadataServiceAddr] = &provider
100
101 return &provider
102 }
103
104 func (c *metadataServiceCredentialProvider) InjectLogger(logger *zap.Logger) {
105 c.logger = logger
106 }
107
108 func (c *metadataServiceCredentialProvider) tokenRefreshLoop() {
109 for {
110 select {
111 case <-c.refreshStop:
112 c.logger.Info("Token refresh loop stopped")
113 c.refreshTicker.Stop()
114 return
115 case <-c.refreshTicker.C:
116 c.logger.Debug("Starting token refresh")
117 c.refreshTokenWithTimeout()
118 }
119 }
120 }
121
122 func (c *metadataServiceCredentialProvider) refreshTokenWithTimeout() {
123 refreshContext, cancel := context.WithTimeout(context.Background(), metadataIamTokenRefreshTimeout)
124 defer cancel()
125
126 if err := c.refreshToken(refreshContext); err != nil {
127 c.logger.Error("Failed to refresh token", zap.Error(err))
128 }
129 }
130
131 func (c *metadataServiceCredentialProvider) refreshToken(ctx context.Context) error {
132 c.refreshMutex.Lock()
133 defer c.refreshMutex.Unlock()
134
135 if c.recentlyRefreshedAndValid() {
136 c.logger.Debug("Token is still valid, skipping refresh")
137 return nil
138 }
139
140 c.logger.Debug("Getting new IAM token")
141 token, actualExpiration, err := c.iamToken(ctx)
142 if err != nil {
143 return fmt.Errorf("failed to get compute instance service account token from instance metadata service: GET %s: %w", c.url(), err)
144 }
145
146 c.currentTokenMutex.Lock()
147 defer c.currentTokenMutex.Unlock()
148
149 c.currentTokenRealExpiration = actualExpiration
150 c.currentToken = token
151 c.lastRefreshTime = time.Now()
152
153 c.logger.Info("Token refreshed successfully",
154 zap.Time("expiresAt", actualExpiration),
155 zap.Time("refreshedAt", c.lastRefreshTime))
156
157 return nil
158 }
159
160 func (c *metadataServiceCredentialProvider) recentlyRefreshedAndValid() bool {
161 if time.Since(c.lastRefreshTime) < metadataTokenCappedExpirationDuration {
162 _, err := c.getValidCachedToken()
163 return err == nil
164 }
165
166 return false
167 }
168
169 func (c *metadataServiceCredentialProvider) iamToken(ctx context.Context) (string, time.Time, error) {
170 req, err := retryablehttp.NewRequest("GET", c.url(), nil)
171 if err != nil {
172 return "", time.Time{}, fmt.Errorf("request make failed: %w", err)
173 }
174 req.Header.Set("Metadata-Flavor", "Google")
175 reqDump, _ := httputil.DumpRequestOut(req.Request, false)
176 c.logger.Debug("Requesting instance SA token", zap.String("request", string(reqDump)))
177
178 resp, err := c.client.Do(req.WithContext(ctx))
179 if err != nil {
180 c.logger.Error("Metadata service call failed", zap.Error(err))
181 return "", time.Time{}, fmt.Errorf("compute instance metadata service call failed.\n" +
182 "Are you inside compute instance?\n" +
183 "Details")
184 }
185
186 defer resp.Body.Close()
187 respDump, _ := httputil.DumpResponse(resp, false)
188 c.logger.Debug("Received metadata service response", zap.String("response", string(respDump)))
189
190 if resp.StatusCode == http.StatusNotFound {
191 c.logger.Error("Service account not found")
192 return "", time.Time{}, fmt.Errorf("%s.\n"+
193 "Is this compute instance running using Service Account? That is, Instance.service_account_id should not be empty.",
194 resp.Status)
195 }
196
197 body, err := io.ReadAll(resp.Body)
198
199 if resp.StatusCode != http.StatusOK {
200 if err != nil {
201 body = []byte(fmt.Sprintf("Failed response body read failed: %s", err.Error()))
202 }
203
204 c.logger.Error("Failed to get SA token",
205 zap.String("status", resp.Status),
206 zap.String("body", string(body)))
207
208 return "", time.Time{}, fmt.Errorf("%s", resp.Status)
209 }
210
211 if err != nil {
212 return "", time.Time{}, fmt.Errorf("response read failed: %s", err)
213 }
214
215 var tokenResponse struct {
216 AccessToken string `json:"access_token"`
217 ExpiresIn int64 `json:"expires_in"`
218 TokenType string `json:"token_type"`
219 }
220
221 err = json.Unmarshal(body, &tokenResponse)
222 if err != nil {
223 c.logger.Error("Failed to unmarshal token response",
224 zap.Error(err))
225 return "", time.Time{}, fmt.Errorf("body unmarshal failed: %w", err)
226 }
227
228 actualExpirationTime := time.Now().Add(time.Duration(tokenResponse.ExpiresIn) * time.Second)
229 c.logger.Debug("Successfully parsed token response",
230 zap.Time("expiresAt", actualExpirationTime),
231 zap.String("tokenType", tokenResponse.TokenType))
232
233 return tokenResponse.AccessToken, actualExpirationTime, nil
234 }
235
236 func (c *metadataServiceCredentialProvider) url() string {
237 return fmt.Sprintf("http://%s/computeMetadata/v1/instance/service-accounts/default/token", c.metadataServiceAddr)
238 }
239
240 func (c *metadataServiceCredentialProvider) Addr() string {
241 return c.metadataServiceAddr
242 }
243
244 func (c *metadataServiceCredentialProvider) Available(ctx context.Context) bool {
245 _, err := c.getValidCachedToken()
246 if err == nil {
247 c.logger.Debug("Cached token is available and valid")
248 return true
249 }
250
251 c.logger.Debug("Checking metadata service availability")
252 dialer := net.Dialer{Timeout: 50 * time.Millisecond}
253
254 conn, err := dialer.Dial("tcp", c.metadataServiceAddr)
255 if err != nil {
256 c.logger.Error("Failed to connect to metadata service", zap.Error(err))
257 return false
258 }
259
260 _ = conn.Close()
261
262 return c.refreshToken(ctx) == nil
263 }
264
265 func (c *metadataServiceCredentialProvider) getValidCachedToken() (string, error) {
266 c.currentTokenMutex.RLock()
267 defer c.currentTokenMutex.RUnlock()
268
269 if c.currentToken != "" && !c.currentTokenRealExpiration.IsZero() {
270 if c.currentTokenRealExpiration.After(time.Now().Add(-metadataIamTokenValidityThreshold)) {
271 return c.currentToken, nil
272 } else {
273 c.logger.Debug("Cached token is expired",
274 zap.Time("expiresAt", c.currentTokenRealExpiration))
275 return "", errors.New("current token is expired")
276 }
277 }
278
279 c.logger.Debug("No cached token available")
280 return "", errors.New("current token is unavailable")
281 }
282
283 func (c *metadataServiceCredentialProvider) IAMToken(ctx context.Context) (*CredentialsToken, error) {
284 _, err := c.getValidCachedToken()
285 if err == nil {
286 c.logger.Debug("Using cached token")
287 return c.toTokenResponse(), nil
288 }
289
290 c.logger.Debug("Cached token invalid or expired, refreshing token")
291 // It is expected that the token is updated at the background, but we do this as the last resort measure here
292 // (basically, the legacy behavior reproduction)
293 err = c.refreshToken(ctx)
294 if err != nil {
295 c.logger.Error("Failed to refresh token", zap.Error(err))
296 return nil, err
297 }
298
299 return c.toTokenResponse(), nil
300 }
301
302 func (c *metadataServiceCredentialProvider) toTokenResponse() *CredentialsToken {
303 c.currentTokenMutex.RLock()
304 defer c.currentTokenMutex.RUnlock()
305
306 expiresAt := getCappedExpiresAt(c.currentTokenRealExpiration)
307 c.logger.Debug("Creating token response", zap.Time("expiresAt", expiresAt))
308
309 return &CredentialsToken{
310 Token: c.currentToken,
311 ExpiresAt: expiresAt,
312 }
313 }
314
315 // getCappedExpiresAt limits the token expiration duration by the metadataTokenCappedExpirationDuration to handle the
316 // following use-case: instance sa is stripped of all permissions (or even worse, deleted), instance is updated to use
317 // another sa with valid permissions; however, since the token is not expired it is still used for subsequent requests
318 // resulting in Permission Denied. Shorter expiration time is a workaround
319 func getCappedExpiresAt(actualExpirationTime time.Time) time.Time {
320 result := time.Now()
321 if actualExpirationTime.Sub(result) > metadataTokenCappedExpirationDuration {
322 result = result.Add(metadataTokenCappedExpirationDuration)
323 }
324
325 return result
326 }
327
328 func (c *metadataServiceCredentialProvider) YandexCloudAPICredentials() {}
329
330 // GetMetadataServiceAddr returns the address of Metadata Service, gets the value from InstanceMetadataOverrideEnvVar
331 // env variable if it is set, otherwise uses the default address from InstanceMetadataAddr.
332 func GetMetadataServiceAddr() string {
333 if nonDefaultAddr := os.Getenv(InstanceMetadataOverrideEnvVar); nonDefaultAddr != "" {
334 return nonDefaultAddr
335 }
336
337 return InstanceMetadataAddr
338 }
339