Skip to content

Commit

Permalink
lakeFS Client from aws role (#7874)
Browse files Browse the repository at this point in the history
  • Loading branch information
guy-har authored Jun 19, 2024
1 parent 214951f commit 99458b8
Show file tree
Hide file tree
Showing 4 changed files with 172 additions and 4 deletions.
134 changes: 133 additions & 1 deletion clients/python-wrapper/lakefs/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,22 @@

from __future__ import annotations

from typing import Optional
import base64
import json
from threading import Lock
from typing import Optional
from urllib.parse import urlparse, parse_qs

import lakefs_sdk
from lakefs_sdk import ExternalLoginInformation
from lakefs_sdk.client import LakeFSClient

from lakefs.config import ClientConfig
from lakefs.exceptions import NotAuthorizedException, ServerException, api_exception_handler
from lakefs.models import ServerStorageConfiguration

DEFAULT_REGION = 'us-east-1'


class ServerConfiguration:
"""
Expand Down Expand Up @@ -106,6 +112,132 @@ def version(self) -> str:
return self._server_conf.version


def _extract_region_from_endpoint(endpoint):
"""
Extract the region name from an STS endpoint URL.
for example: https://sts.eu-central-1.amazonaws.com/ -> eu-central-1
and for example: https://sts.amazonaws.com/ -> DEFAULT_REGION
:param endpoint: The endpoint URL of the STS client.
:return: The region name extracted from the endpoint URL.
"""

parts = endpoint.split('.')
if len(parts) == 4:
return parts[1]
if len(parts) > 4:
return parts[2]
return DEFAULT_REGION


def _get_identity_token(
session: 'boto3.Session',
lakefs_host: str,
additional_headers: dict[str, str],
presign_expiry
) -> str:
"""
Generate the identity token required for lakeFS authentication from an AWS session.
This function uses the STS client to generate a presigned URL for the `get_caller_identity` action,
extracts the required values from the URL,
and creates a base64-encoded JSON object with these values.
:param session: A boto3 session object with the necessary AWS credentials and region information.
:return: A base64-encoded JSON string containing the required authentication information.
:raises ValueError: If the session does not have a region name set.
"""

# this method should only be called when installing the aws-iam additional requirement
from botocore.client import Config # pylint: disable=import-outside-toplevel, import-error
from botocore.signers import RequestSigner # pylint: disable=import-outside-toplevel, import-error

sts_client = session.client('sts', config=Config(signature_version='v4'))
endpoint = sts_client.meta.endpoint_url
service_id = sts_client.meta.service_model.service_id
region = _extract_region_from_endpoint(endpoint)
# signer is used because the presigned URL generated by the STS does not support additional headers
signer = RequestSigner(
service_id,
region,
'sts',
'v4',
session.get_credentials(),
session.events
)
endpoint_with_params = f"{endpoint}/?Action=GetCallerIdentity&Version=2011-06-15"
if additional_headers is None:
additional_headers = {
'X-LakeFS-Server-ID': lakefs_host,
}
params = {
'method': 'POST',
'url': endpoint_with_params,
'body': {},
'headers': additional_headers,
'context': {}
}

presigned_url = signer.generate_presigned_url(
params,
region_name=region,
expires_in=presign_expiry,
operation_name=''
)
parsed_url = urlparse(presigned_url)
query_params = parse_qs(parsed_url.query)

# Extract values from query parameters
json_object = {
"method": "POST",
"host": parsed_url.hostname,
"region": region,
"action": query_params['Action'][0],
"date": query_params['X-Amz-Date'][0],
"expiration_duration": query_params['X-Amz-Expires'][0],
"access_key_id": query_params['X-Amz-Credential'][0].split('/')[0],
"signature": query_params['X-Amz-Signature'][0],
"signed_headers": query_params.get('X-Amz-SignedHeaders', [''])[0].split(';'),
"version": query_params['Version'][0],
"algorithm": query_params['X-Amz-Algorithm'][0],
"security_token": query_params.get('X-Amz-Security-Token', [None])[0]
}

json_string = json.dumps(json_object)
return base64.b64encode(json_string.encode('utf-8')).decode('utf-8')


def from_aws_role(
session: 'boto3.Session',
ttl_seconds: int = 3600,
presigned_ttl: int = 60,
additional_headers: dict[str, str] = None,
**kwargs) -> Client:
"""
Create a lakeFS client from an AWS role.
:param session: : The boto3 session.
:param ttl_seconds: The time-to-live for the generated lakeFS token in seconds. The default value is 3600 seconds.
:param presigned_ttl: The time-to-live for the presigned URL in seconds. The default value is 60 seconds.
:param additional_headers: Additional headers to include in the presigned URL.
:param kwargs: The arguments to pass to the client.
:return: A lakeFS client.
"""

client = Client(**kwargs)
lakefs_host = urlparse(client.config.host).hostname
identity_token = _get_identity_token(session, lakefs_host, presign_expiry=presigned_ttl,
additional_headers=additional_headers)
external_login_information = ExternalLoginInformation(token_expiration_duration=ttl_seconds, identity_request={
"identity_token": identity_token
})

with api_exception_handler():
auth_token = client.sdk_client.auth_api.external_principal_login(external_login_information)

client.config.access_token = auth_token.token
return client


def from_web_identity(code: str, state: str, redirect_uri: str, ttl_seconds: int = 3600, **kwargs) -> Client:
"""
Authenticate against lakeFS using a code received from an identity provider
Expand Down
5 changes: 4 additions & 1 deletion clients/python-wrapper/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,8 @@
include_package_data=True,
license="Apache 2.0",
long_description=long_description,
long_description_content_type='text/markdown'
long_description_content_type='text/markdown',
extras_require={
'aws-iam': ["boto3 >= 1.26.0"],
},
)
3 changes: 3 additions & 0 deletions docs/integrations/python.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ In case no authentication parameters exist, it is also possible to explicitly cr

Here's how to instantiate a client:

{: .note }
See [here](../reference/security/external-principals-aws.md#login-with-python) for instructions on how to log in with Python using your AWS role. This is applicable for enterprise users.

```python
from lakefs.client import Client

Expand Down
34 changes: 32 additions & 2 deletions docs/reference/security/external-principals-aws.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,41 @@ for p in resp.results:
# do something
```


## Get lakeFS API Token

The login to lakeFS is done by calling the [login API][login-api] with the `GetCallerIdentity` request signed by the client.
Currently, the login operation is supported out of the box in [lakeFS Hadoop FileSystem][lakefs-hadoopfs] version 0.2.4, see [Spark usage][lakefs-spark].
Other clients (i.e HTTP, Python etc) can use the login endpoint to authenticate to lakeFS but, you will have to build the request input.
Currently, the login operation is supported out of the box in:
- [lakeFS Hadoop FileSystem][lakefs-hadoopfs] version 0.2.4, see [Spark usage][lakefs-spark]
- [python](#login-with-python)

For other use cases authenticate to lakeFS via login endpoint, this will require building the request input.

## Login with python

### prerequisites

lakeFS requires additional python packages to be installed in order to generate a lakeFS client with the assumed role.
To install the required packages, run the following command:

```sh
pip install lakefs[aws-iam]
```

In order to generate a lakeFS client with the assumed role, initiate a boto3 session with the desired role and call the `get_caller_identity` method to get the caller identity:


```python
import lakefs
import boto3
session = boto3.Session()
myclient = lakefs.client.from_aws_role(session=session, ttl_seconds = 7200, host="<lakefs-host>")
# list repositories
repos = lakefs.repositories(client=myclient)
for r in repos:
print(r)
```


[external-principal-admin]: {% link reference/cli.md %}#external
Expand Down

0 comments on commit 99458b8

Please sign in to comment.