metadata_service.go raw

   1  package credentials
   2  
   3  import (
   4  	"context"
   5  	"encoding/json"
   6  	"errors"
   7  	"fmt"
   8  	"io"
   9  	"net"
  10  	"net/http"
  11  	"net/http/httputil"
  12  	"os"
  13  	"sync"
  14  	"time"
  15  
  16  	"github.com/hashicorp/go-retryablehttp"
  17  	"go.uber.org/zap"
  18  )
  19  
  20  const (
  21  	metadataIamTokenValidityThreshold = time.Second * 5
  22  	metadataIamTokenRefreshTimeout    = time.Second * 10
  23  
  24  	// metadataTokenCappedExpirationSeconds is a constant for a case you can find below. This implementation overrides
  25  	// the expiration duration to 60 seconds max to handle cases when the token gets invalid: E.g. instance sa is
  26  	// stripped of all permissions (or even worse, deleted), instance is updated to use another sa with valid
  27  	// permissions; however, since the token is not expired it is still used for subsequent requests resulting in
  28  	// Permission Denied. Shorter expiration time is a workaround
  29  	metadataTokenCappedExpirationDuration = time.Second * 60
  30  	metadataTokenRefreshInterval          = metadataTokenCappedExpirationDuration - metadataIamTokenRefreshTimeout
  31  )
  32  
  33  var _ NonExchangeableCredentials = (*metadataServiceCredentialProvider)(nil)
  34  var _ MetadataServiceCredentialProvider = (*metadataServiceCredentialProvider)(nil)
  35  
  36  type MetadataServiceCredentialProvider interface {
  37  	NonExchangeableCredentials
  38  
  39  	Addr() string
  40  	Available(ctx context.Context) bool
  41  }
  42  
  43  type metadataServiceCredentialProvider struct {
  44  	metadataServiceAddr string
  45  	client              retryablehttp.Client
  46  
  47  	currentTokenMutex          sync.RWMutex
  48  	currentToken               string
  49  	currentTokenRealExpiration time.Time
  50  
  51  	refreshMutex    sync.Mutex
  52  	lastRefreshTime time.Time
  53  
  54  	refreshStop   chan struct{}
  55  	refreshTicker *time.Ticker
  56  	logger        *zap.Logger
  57  }
  58  
  59  var providerInstances = make(map[string]MetadataServiceCredentialProvider)
  60  var providerInstancesLock sync.Mutex
  61  
  62  // MetadataService returns credentials provider that queries local metadata service for IAM tokens
  63  // This is currently available on Yandex Cloud Compute Instances instances with a Service Account attached
  64  // https://yandex.cloud/ru/docs/compute/concepts/vm-metadata
  65  func MetadataService() MetadataServiceCredentialProvider {
  66  	return NewMetadataServiceCredentialProvider(GetMetadataServiceAddr())
  67  }
  68  
  69  func NewMetadataServiceCredentialProvider(metadataServiceAddr string) MetadataServiceCredentialProvider {
  70  	providerInstancesLock.Lock()
  71  	defer providerInstancesLock.Unlock()
  72  
  73  	if provider, ok := providerInstances[metadataServiceAddr]; ok {
  74  		return provider
  75  	}
  76  
  77  	provider := metadataServiceCredentialProvider{
  78  		metadataServiceAddr: metadataServiceAddr,
  79  		client: retryablehttp.Client{
  80  			HTTPClient: &http.Client{
  81  				Transport: &http.Transport{
  82  					DialContext: (&net.Dialer{
  83  						Timeout:   time.Second, // One second should be enough for localhost connection.
  84  						KeepAlive: -1,          // No keep alive. Near token per hour requested.
  85  					}).DialContext,
  86  					ResponseHeaderTimeout: 5 * time.Second, // Prevent hanging after successful request write.
  87  				},
  88  			},
  89  			RetryMax:   5,
  90  			CheckRetry: retryablehttp.DefaultRetryPolicy,
  91  			Backoff:    retryablehttp.DefaultBackoff,
  92  		},
  93  		refreshStop:   make(chan struct{}),
  94  		refreshTicker: time.NewTicker(metadataTokenRefreshInterval),
  95  		logger:        zap.NewNop(),
  96  	}
  97  	go provider.tokenRefreshLoop()
  98  
  99  	providerInstances[metadataServiceAddr] = &provider
 100  
 101  	return &provider
 102  }
 103  
 104  func (c *metadataServiceCredentialProvider) InjectLogger(logger *zap.Logger) {
 105  	c.logger = logger
 106  }
 107  
 108  func (c *metadataServiceCredentialProvider) tokenRefreshLoop() {
 109  	for {
 110  		select {
 111  		case <-c.refreshStop:
 112  			c.logger.Info("Token refresh loop stopped")
 113  			c.refreshTicker.Stop()
 114  			return
 115  		case <-c.refreshTicker.C:
 116  			c.logger.Debug("Starting token refresh")
 117  			c.refreshTokenWithTimeout()
 118  		}
 119  	}
 120  }
 121  
 122  func (c *metadataServiceCredentialProvider) refreshTokenWithTimeout() {
 123  	refreshContext, cancel := context.WithTimeout(context.Background(), metadataIamTokenRefreshTimeout)
 124  	defer cancel()
 125  
 126  	if err := c.refreshToken(refreshContext); err != nil {
 127  		c.logger.Error("Failed to refresh token", zap.Error(err))
 128  	}
 129  }
 130  
 131  func (c *metadataServiceCredentialProvider) refreshToken(ctx context.Context) error {
 132  	c.refreshMutex.Lock()
 133  	defer c.refreshMutex.Unlock()
 134  
 135  	if c.recentlyRefreshedAndValid() {
 136  		c.logger.Debug("Token is still valid, skipping refresh")
 137  		return nil
 138  	}
 139  
 140  	c.logger.Debug("Getting new IAM token")
 141  	token, actualExpiration, err := c.iamToken(ctx)
 142  	if err != nil {
 143  		return fmt.Errorf("failed to get compute instance service account token from instance metadata service: GET %s: %w", c.url(), err)
 144  	}
 145  
 146  	c.currentTokenMutex.Lock()
 147  	defer c.currentTokenMutex.Unlock()
 148  
 149  	c.currentTokenRealExpiration = actualExpiration
 150  	c.currentToken = token
 151  	c.lastRefreshTime = time.Now()
 152  
 153  	c.logger.Info("Token refreshed successfully",
 154  		zap.Time("expiresAt", actualExpiration),
 155  		zap.Time("refreshedAt", c.lastRefreshTime))
 156  
 157  	return nil
 158  }
 159  
 160  func (c *metadataServiceCredentialProvider) recentlyRefreshedAndValid() bool {
 161  	if time.Since(c.lastRefreshTime) < metadataTokenCappedExpirationDuration {
 162  		_, err := c.getValidCachedToken()
 163  		return err == nil
 164  	}
 165  
 166  	return false
 167  }
 168  
 169  func (c *metadataServiceCredentialProvider) iamToken(ctx context.Context) (string, time.Time, error) {
 170  	req, err := retryablehttp.NewRequest("GET", c.url(), nil)
 171  	if err != nil {
 172  		return "", time.Time{}, fmt.Errorf("request make failed: %w", err)
 173  	}
 174  	req.Header.Set("Metadata-Flavor", "Google")
 175  	reqDump, _ := httputil.DumpRequestOut(req.Request, false)
 176  	c.logger.Debug("Requesting instance SA token", zap.String("request", string(reqDump)))
 177  
 178  	resp, err := c.client.Do(req.WithContext(ctx))
 179  	if err != nil {
 180  		c.logger.Error("Metadata service call failed", zap.Error(err))
 181  		return "", time.Time{}, fmt.Errorf("compute instance metadata service call failed.\n" +
 182  			"Are you inside compute instance?\n" +
 183  			"Details")
 184  	}
 185  
 186  	defer resp.Body.Close()
 187  	respDump, _ := httputil.DumpResponse(resp, false)
 188  	c.logger.Debug("Received metadata service response", zap.String("response", string(respDump)))
 189  
 190  	if resp.StatusCode == http.StatusNotFound {
 191  		c.logger.Error("Service account not found")
 192  		return "", time.Time{}, fmt.Errorf("%s.\n"+
 193  			"Is this compute instance running using Service Account? That is, Instance.service_account_id should not be empty.",
 194  			resp.Status)
 195  	}
 196  
 197  	body, err := io.ReadAll(resp.Body)
 198  
 199  	if resp.StatusCode != http.StatusOK {
 200  		if err != nil {
 201  			body = []byte(fmt.Sprintf("Failed response body read failed: %s", err.Error()))
 202  		}
 203  
 204  		c.logger.Error("Failed to get SA token",
 205  			zap.String("status", resp.Status),
 206  			zap.String("body", string(body)))
 207  
 208  		return "", time.Time{}, fmt.Errorf("%s", resp.Status)
 209  	}
 210  
 211  	if err != nil {
 212  		return "", time.Time{}, fmt.Errorf("response read failed: %s", err)
 213  	}
 214  
 215  	var tokenResponse struct {
 216  		AccessToken string `json:"access_token"`
 217  		ExpiresIn   int64  `json:"expires_in"`
 218  		TokenType   string `json:"token_type"`
 219  	}
 220  
 221  	err = json.Unmarshal(body, &tokenResponse)
 222  	if err != nil {
 223  		c.logger.Error("Failed to unmarshal token response",
 224  			zap.Error(err))
 225  		return "", time.Time{}, fmt.Errorf("body unmarshal failed: %w", err)
 226  	}
 227  
 228  	actualExpirationTime := time.Now().Add(time.Duration(tokenResponse.ExpiresIn) * time.Second)
 229  	c.logger.Debug("Successfully parsed token response",
 230  		zap.Time("expiresAt", actualExpirationTime),
 231  		zap.String("tokenType", tokenResponse.TokenType))
 232  
 233  	return tokenResponse.AccessToken, actualExpirationTime, nil
 234  }
 235  
 236  func (c *metadataServiceCredentialProvider) url() string {
 237  	return fmt.Sprintf("http://%s/computeMetadata/v1/instance/service-accounts/default/token", c.metadataServiceAddr)
 238  }
 239  
 240  func (c *metadataServiceCredentialProvider) Addr() string {
 241  	return c.metadataServiceAddr
 242  }
 243  
 244  func (c *metadataServiceCredentialProvider) Available(ctx context.Context) bool {
 245  	_, err := c.getValidCachedToken()
 246  	if err == nil {
 247  		c.logger.Debug("Cached token is available and valid")
 248  		return true
 249  	}
 250  
 251  	c.logger.Debug("Checking metadata service availability")
 252  	dialer := net.Dialer{Timeout: 50 * time.Millisecond}
 253  
 254  	conn, err := dialer.Dial("tcp", c.metadataServiceAddr)
 255  	if err != nil {
 256  		c.logger.Error("Failed to connect to metadata service", zap.Error(err))
 257  		return false
 258  	}
 259  
 260  	_ = conn.Close()
 261  
 262  	return c.refreshToken(ctx) == nil
 263  }
 264  
 265  func (c *metadataServiceCredentialProvider) getValidCachedToken() (string, error) {
 266  	c.currentTokenMutex.RLock()
 267  	defer c.currentTokenMutex.RUnlock()
 268  
 269  	if c.currentToken != "" && !c.currentTokenRealExpiration.IsZero() {
 270  		if c.currentTokenRealExpiration.After(time.Now().Add(-metadataIamTokenValidityThreshold)) {
 271  			return c.currentToken, nil
 272  		} else {
 273  			c.logger.Debug("Cached token is expired",
 274  				zap.Time("expiresAt", c.currentTokenRealExpiration))
 275  			return "", errors.New("current token is expired")
 276  		}
 277  	}
 278  
 279  	c.logger.Debug("No cached token available")
 280  	return "", errors.New("current token is unavailable")
 281  }
 282  
 283  func (c *metadataServiceCredentialProvider) IAMToken(ctx context.Context) (*CredentialsToken, error) {
 284  	_, err := c.getValidCachedToken()
 285  	if err == nil {
 286  		c.logger.Debug("Using cached token")
 287  		return c.toTokenResponse(), nil
 288  	}
 289  
 290  	c.logger.Debug("Cached token invalid or expired, refreshing token")
 291  	// It is expected that the token is updated at the background, but we do this as the last resort measure here
 292  	// (basically, the legacy behavior reproduction)
 293  	err = c.refreshToken(ctx)
 294  	if err != nil {
 295  		c.logger.Error("Failed to refresh token", zap.Error(err))
 296  		return nil, err
 297  	}
 298  
 299  	return c.toTokenResponse(), nil
 300  }
 301  
 302  func (c *metadataServiceCredentialProvider) toTokenResponse() *CredentialsToken {
 303  	c.currentTokenMutex.RLock()
 304  	defer c.currentTokenMutex.RUnlock()
 305  
 306  	expiresAt := getCappedExpiresAt(c.currentTokenRealExpiration)
 307  	c.logger.Debug("Creating token response", zap.Time("expiresAt", expiresAt))
 308  
 309  	return &CredentialsToken{
 310  		Token:     c.currentToken,
 311  		ExpiresAt: expiresAt,
 312  	}
 313  }
 314  
 315  // getCappedExpiresAt limits the token expiration duration by the metadataTokenCappedExpirationDuration to handle the
 316  // following use-case: instance sa is stripped of all permissions (or even worse, deleted), instance is updated to use
 317  // another sa with valid permissions; however, since the token is not expired it is still used for subsequent requests
 318  // resulting in Permission Denied. Shorter expiration time is a workaround
 319  func getCappedExpiresAt(actualExpirationTime time.Time) time.Time {
 320  	result := time.Now()
 321  	if actualExpirationTime.Sub(result) > metadataTokenCappedExpirationDuration {
 322  		result = result.Add(metadataTokenCappedExpirationDuration)
 323  	}
 324  
 325  	return result
 326  }
 327  
 328  func (c *metadataServiceCredentialProvider) YandexCloudAPICredentials() {}
 329  
 330  // GetMetadataServiceAddr returns the address of Metadata Service, gets the value from InstanceMetadataOverrideEnvVar
 331  // env variable if it is set, otherwise uses the default address from InstanceMetadataAddr.
 332  func GetMetadataServiceAddr() string {
 333  	if nonDefaultAddr := os.Getenv(InstanceMetadataOverrideEnvVar); nonDefaultAddr != "" {
 334  		return nonDefaultAddr
 335  	}
 336  
 337  	return InstanceMetadataAddr
 338  }
 339