aws-samples · switch180 · Mar 29, 2024 · Feb 25, 2024 · Feb 25, 2024 · Feb 25, 2024
diff --git a/.github/scripts/build-assets.py b/.github/scripts/build-assets.py
@@ -30,7 +30,9 @@
     'design-patterns/cloudformation/UserDataC9.sh',
     'event-driven/event-driven-cfn.yaml',
     'static/files/hands-on-labs/migration-env-setup.yaml',
-    'static/files/hands-on-labs/migration-dms-setup.yaml']
+    'static/files/hands-on-labs/migration-dms-setup.yaml',
+    'static/files/dynamodb-opensearch-zetl/dynamodb-opensearch-setup.yaml'
+]
 for inp_file in data_files:
     src_file = os.path.join(pkg_root, inp_file)
     head, tail = ntpath.split(src_file)
@@ -60,6 +62,13 @@
 		workshop_zip.write(scenario2)
 shutil.move(os.path.join(os.getcwd(), 'scenario-solutions.zip'), os.path.join(dest_root, 'assets', 'scenario-solutions.zip'))
 
+#Create LHOL zETL ZIP
+os.chdir(os.path.join(pkg_root, 'static', 'files', 'dynamodb-opensearch-zetl'))
+with ZipFile('OpenSearchPipeline.zip', 'w') as workshop_zip:
+	for pyscript in glob.glob('./OpenSearchPipeline/*'):
+		workshop_zip.write(pyscript)
+shutil.move(os.path.join(os.getcwd(), 'OpenSearchPipeline.zip'), os.path.join(dest_root, 'assets', 'OpenSearchPipeline.zip'))
+
 
 #Create Game-Player-Data Python Scripts ZIP
 os.chdir(os.path.join(pkg_root, 'game-player-data'))

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -53,6 +53,10 @@ jobs:
       run: aws s3api put-object-acl --grant-read uri=http://acs.amazonaws.com/groups/global/AllUsers --bucket $STEP_S3_BUCKET --key assets/migration-env-setup.yaml
     - name: SetS3AclLHOLDMS
       run: aws s3api put-object-acl --grant-read uri=http://acs.amazonaws.com/groups/global/AllUsers --bucket $STEP_S3_BUCKET --key assets/migration-dms-setup.yaml
+    - name: SetS3AclLHOLzETL
+      run: aws s3api put-object-acl --grant-read uri=http://acs.amazonaws.com/groups/global/AllUsers --bucket $STEP_S3_BUCKET --key assets/OpenSearchPipeline.zip
+    - name: SetS3AclLHOLzETLYaml
+      run: aws s3api put-object-acl --grant-read uri=http://acs.amazonaws.com/groups/global/AllUsers --bucket $STEP_S3_BUCKET --key assets/dynamodb-opensearch-setup.yaml
     - name: SetS3AclLGME1
       run: aws s3api put-object-acl --grant-read uri=http://acs.amazonaws.com/groups/global/AllUsers --bucket $STEP_S3_BUCKET --key assets/battle-royale.zip
     - name: SetS3AclLMR

diff --git a/content/change-data-capture/clean-up/index.en.md b/content/change-data-capture/clean-up/index.en.md
@@ -9,7 +9,7 @@ Congratulations! You have made it to the end of the workshop.
 
 In this workshop you explored capturing item level changes on a DynamoDB table using DynamoDB Streams and Kinesis Data Streams. In this instance, you wrote the previous version of updated items to a different DynamoDB table. By applying these same techniques, you can build complex event driven solutions that are triggered by changes to items you have stored on DynamoDB.
 
-If you used an account provided by Event Engine, you do not need to do any cleanup. The account terminates when the event is over.
+If you used an account provided by Workshop Studio, you do not need to do any cleanup. The account terminates when the event is over.
 
 If you used your own account, please remove the following resources:
 

diff --git a/content/change-data-capture/index.en.md b/content/change-data-capture/index.en.md
@@ -2,7 +2,7 @@
 title: "LCDC: Change Data Capture for Amazon DynamoDB"
 chapter: true
 description: "200 level: Hands-on exercises with DynamoDB Streams and Kinesis Data Streams with Kinesis Analytics."
-weight: 2
+weight: 40
 ---
 In this workshop, you will learn how to perform change data capture of item level changes on DynamoDB tables using [Amazon DynamoDB Streams](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Streams.html) and [Amazon Kinesis Data Streams](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/kds.html). This technique allows you to develop event-driven solutions that are initiated by alterations made to item-level data stored in DynamoDB.
 

diff --git a/content/change-data-capture/setup/index.en.md b/content/change-data-capture/setup/index.en.md
@@ -16,7 +16,7 @@ To run this lab, you will need an AWS account, and a user identity with access t
 * Amazon Simple Queue Service
 * AWS Cloud9 Environment
 
-You can use your own account, or an account provided through Event Engine as part of an AWS organized workshop.  Using an account provided by Event Engine is the easier path, as you will have full access to all AWS services, and the account will terminate automatically when the event is over.
+You can use your own account, or an account provided through Workshop Studio as part of an AWS organized workshop.  Using an account provided by Workshop Studio is the easier path, as you will have full access to all AWS services, and the account will terminate automatically when the event is over.
 
 To set up this workshop, choose one of the following links, depending on whether you are:
 - :link[Attending an AWS-hosted event (using AWS-provided access codes)]{href="/change-data-capture/setup/aws-ws-event/"}

diff --git a/content/design-patterns/index.en.md b/content/design-patterns/index.en.md
@@ -2,7 +2,7 @@
 title: "LADV: Advanced Design Patterns for Amazon DynamoDB"
 chapter: true
 description: "300 level: Hands-on exercise using Python and DynamoDB best practices."
-weight: 10
+weight: 30
 ---
 In this workshop, you review [Amazon DynamoDB](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Introduction.html) design patterns and best practices to build highly scalable applications that are optimized for performance and cost. This workshop implements these design patterns by using Python scripts. At the end of this workshop, you will have the knowledge to build and monitor DynamoDB applications that can grow to any size and scale.
 

diff --git a/content/design-patterns/setup/user-account.en.md b/content/design-patterns/setup/user-account.en.md
@@ -5,7 +5,7 @@ weight: 5
 chapter: true
 ---
 
-::alert[These setup instructions are identitical for LADV, LHOL, LMR, and LGME - all of which use the same Cloud9 template. Only complete this section once, and only if you're running it on your own account.]{type="warning"}
+::alert[These setup instructions are identitical for LADV, LHOL, LMR, LBED, and LGME - all of which use the same Cloud9 template. Only complete this section once, and only if you're running it on your own account.]{type="warning"}
 
 ::alert[Only complete this section if you are running the workshop on your own. If you are at an AWS hosted event (such as re\:Invent, Immersion Day, etc), go to :link[At an AWS hosted Event]{href="/design-patterns/setup/aws-ws-event"}]
 

diff --git a/content/dynamodb-opensearch-zetl/index.en.md b/content/dynamodb-opensearch-zetl/index.en.md
@@ -0,0 +1,20 @@
+---
+title: "LBED: DynamoDB GenAI with Amazon Bedrock & Zero-ETL to OpenSearch Integration"
+date: 2024-02-23T00:00:00-00:00
+weight: 20
+chapter: true
+description: "In this module you will have a hands on experience setting up DynamoDB zero-ETL integration with Amazon OpenSearch Service."
+---
+
+In this module you will have a hands on experience setting up DynamoDB zero-ETL integration with Amazon OpenSearch Service. You will create a pipeline from a DynamoDB table to OpenSearch Service, create an Amazon Bedrock Connector in OpenSearch Service, and query Bedrock leveraging OpenSearch Service as a vector store.
+At the end of this lesson, you should feel confident in your ability to integrate DynamoDB with OpenSearch Service to support context aware reasoning applications.
+
+Pairing Amazon DynamoDB with Amazon OpenSearch Service is a common architecture pattern for applications that need to combine the high scalability and performance of DynamoDB for transactional workloads with the powerful search and analytics capabilities of OpenSearch.
+
+DynamoDB is a NoSQL database designed for high availability, performance, and scalability and focused on key/value operations. OpenSearch Service provides advanced search features such as full-text search, faceted search, and complex querying capabilities. Combined, these two services can satisfy a wide variety of application use cases.
+
+This module will allow you to set up one such use case. DynamoDB will be the source of truth for product catalog information and OpenSearch will provide vector search capabilities to enable Amazon Bedrock (a generative AI service) to make product recommendations.
+
+::alert[_This lab creates OpenSearch Service, DynamoDB, and Secrets Manager resources. If running in you own account, these resources will incur charges of approximately $30 a month. Remember to delete the CloudFormation Stack after completing the lab._]{type="warning"}
+
+![Final Deployment Architecture](/static/images/ddb-os-zetl.png)
diff --git a/content/dynamodb-opensearch-zetl/integrations/index.en.md b/content/dynamodb-opensearch-zetl/integrations/index.en.md
@@ -0,0 +1,7 @@
+---
+title: "3. Integrations"
+menuTitle: "Integrations"
+date: 2024-02-23T00:00:00-00:00
+weight: 30
+---
+In this section, you will configure integrations between services. You'll first set up ML and Pipeline connectors in OpenSearch Service followed by a zero ETL connector to move data written to DynamoDB to OpenSearch. Once these integrations are set up, you'll be able to write records to DynamoDB as your source of truth and then automatically have that data available to query in other services.
diff --git a/content/dynamodb-opensearch-zetl/integrations/os-connectors.en.md b/content/dynamodb-opensearch-zetl/integrations/os-connectors.en.md
@@ -0,0 +1,190 @@
+---
+title: "Configure Integrations"
+menuTitle: "Load DynamoDB Data"
+date: 2024-02-23T00:00:00-00:00
+weight: 20
+---
+In this section you'll configure ML and Pipeline connectors in OpenSearch Service. These configurations are set up by a series of POST and PUT requests that are authenticated with AWS Signature Version 4 (sig-v4). Sigv4 is the standard authentication mechanism used by AWS services. While in most cases an SDK abstracts away sig-v4 but in this case we will be building the requests ourselves with curl.
+
+Building a sig-v4 signed request requires a session token, access key, and secret access key. You'll first retrieve these from your Cloud9 Instance metadata with the provided "credentials.sh" script which exports required values to environmental variables. In the following steps, you'll also export other values to environmental variables to allow for easy substitution into listed commands.
+
+ 1. Run the credentials.sh script to retrieve and export credentials. These credentials will be used to sign API requests to the OpenSearch cluster. Note the leading "." before "./credentials.sh", this must be included to ensure that the exported credentials are available in the currently running shell.
+    ```bash
+    . ./credentials.sh 
+    ```
+ 1. Next, export an environmental variable with the OpenSearch endpoint URL. This URL is listed in the CloudFormation Stack Outputs tab as "OSDomainEndpoint". This variable will be used in subsequent commands.
+    ```bash
+    export OPENSEARCH_ENDPOINT="https://search-ddb-os-xxxx-xxxxxxxxxxxxx.us-west-2.es.amazonaws.com"
+    ```
+ 1. Execute the following curl command to create the OpenSearch ML model connector.
+    ```bash
+    curl --request POST \
+      ${OPENSEARCH_ENDPOINT}'/_plugins/_ml/connectors/_create' \
+      --header 'Content-Type: application/json' \
+      --header 'Accept: application/json' \
+      --header "x-amz-security-token: ${METADATA_AWS_SESSION_TOKEN}" \
+      --aws-sigv4 aws:amz:${METADATA_AWS_REGION}:es \
+      --user "${METADATA_AWS_ACCESS_KEY_ID}:${METADATA_AWS_SECRET_ACCESS_KEY}" \
+      --data-raw '{
+      "name": "Amazon Bedrock Connector: embedding",
+      "description": "The connector to bedrock Titan embedding model",
+      "version": 1,
+      "protocol": "aws_sigv4",
+      "parameters": {
+        "region": "'${METADATA_AWS_REGION}'",
+        "service_name": "bedrock"
+      },
+      "credential": {
+        "roleArn": "'${METADATA_AWS_ROLE}'"
+      },
+      "actions": [
+        {
+          "action_type": "predict",
+          "method": "POST",
+          "url": "https://bedrock-runtime.'${METADATA_AWS_REGION}'.amazonaws.com/model/amazon.titan-embed-text-v1/invoke",
+          "headers": {
+            "content-type": "application/json",
+            "x-amz-content-sha256": "required"
+          },
+          "request_body": "{ \"inputText\": \"${parameters.inputText}\" }",
+          "pre_process_function": "\n    StringBuilder builder = new StringBuilder();\n    builder.append(\"\\\"\");\n    String first = params.text_docs[0];\n    builder.append(first);\n    builder.append(\"\\\"\");\n    def parameters = \"{\" +\"\\\"inputText\\\":\" + builder + \"}\";\n    return  \"{\" +\"\\\"parameters\\\":\" + parameters + \"}\";",
+          "post_process_function": "\n      def name = \"sentence_embedding\";\n      def dataType = \"FLOAT32\";\n      if (params.embedding == null || params.embedding.length == 0) {\n        return params.message;\n      }\n      def shape = [params.embedding.length];\n      def json = \"{\" +\n                 \"\\\"name\\\":\\\"\" + name + \"\\\",\" +\n                 \"\\\"data_type\\\":\\\"\" + dataType + \"\\\",\" +\n                 \"\\\"shape\\\":\" + shape + \",\" +\n                 \"\\\"data\\\":\" + params.embedding +\n                 \"}\";\n      return json;\n    "
+        }
+      ]
+    }'
+    ```
+ 1. Note the "connector_id" returned in the previous command. Export it to an environmental variable for convenient substitution in future commands.
+    ```bash
+    export CONNECTOR_ID='xxxxxxxxxxxxxx'
+    ```
+ 1. Run the next curl command to create the model group.
+    ```bash
+    curl --request POST \
+      ${OPENSEARCH_ENDPOINT}'/_plugins/_ml/model_groups/_register' \
+      --header 'Content-Type: application/json' \
+      --header 'Accept: application/json' \
+      --header "x-amz-security-token: ${METADATA_AWS_SESSION_TOKEN}" \
+      --aws-sigv4 aws:amz:${METADATA_AWS_REGION}:es \
+      --user "${METADATA_AWS_ACCESS_KEY_ID}:${METADATA_AWS_SECRET_ACCESS_KEY}" \
+      --data-raw '{
+        "name": "remote_model_group",
+        "description": "This is an example description"
+    }'
+    ```
+ 1. Note the "model_group_id" returned in the previous command. Export it to an environmental variable for later substitution.
+    ```bash
+    export MODEL_GROUP_ID='xxxxxxxxxxxxx'
+    ```
+ 1. The next curl command registers the connector with the model group.
+    ```bash
+    curl --request POST \
+      ${OPENSEARCH_ENDPOINT}'/_plugins/_ml/models/_register' \
+      --header 'Content-Type: application/json' \
+      --header 'Accept: application/json' \
+      --header "x-amz-security-token: ${METADATA_AWS_SESSION_TOKEN}" \
+      --aws-sigv4 aws:amz:${METADATA_AWS_REGION}:es \
+      --user "${METADATA_AWS_ACCESS_KEY_ID}:${METADATA_AWS_SECRET_ACCESS_KEY}" \
+      --data-raw '{
+      "name": "Bedrock embedding model",
+      "function_name": "remote",
+      "model_group_id": "'${MODEL_GROUP_ID}'",
+      "description": "embedding model",
+      "connector_id": "'${CONNECTOR_ID}'"
+    }'
+    ```
+ 1. Note the "model_id" and export it.
+    ```bash
+    export MODEL_ID='xxxxxxxxxxxxx'
+    ```
+ 1. Run the following command to verify that you have successfully exported the connector, model group, and model id.
+    ```bash
+    echo -e "CONNECTOR_ID=${CONNECTOR_ID}\nMODEL_GROUP_ID=${MODEL_GROUP_ID}\nMODEL_ID=${MODEL_ID}"
+    ```
+ 1. Next, we'll deploy the model with the following curl.
+    ```bash
+    curl --request POST \
+      ${OPENSEARCH_ENDPOINT}'/_plugins/_ml/models/'${MODEL_ID}'/_deploy' \
+      --header 'Content-Type: application/json' \
+      --header 'Accept: application/json' \
+      --header "x-amz-security-token: ${METADATA_AWS_SESSION_TOKEN}" \
+      --aws-sigv4 aws:amz:${METADATA_AWS_REGION}:es \
+      --user "${METADATA_AWS_ACCESS_KEY_ID}:${METADATA_AWS_SECRET_ACCESS_KEY}"
+    ```
+
+    With the model created, OpenSearch can now use Bedrock's Titan embedding model for processing text. An embeddings model is a type of machine learning model that transforms high-dimensional data (like text or images) into lower-dimensional vectors, known as embeddings. These vectors capture the semantic or contextual relationships between the data points in a more compact, dense representation.
+
+    The embeddings represent the semantic meaning of the input data, in this case product descriptions. Words with similar meanings are represented by vectors that are close to each other in the vector space. For example, the vectors for "sturdy" and "strong" would be closer to each other than to "warm".
+
+ 1. Now we can test the model. If you recieve results back with a "200" status code, everything is working properly.
+    ```bash
+    curl --request POST \
+      ${OPENSEARCH_ENDPOINT}'/_plugins/_ml/models/'${MODEL_ID}'/_predict' \
+      --header 'Content-Type: application/json' \
+      --header 'Accept: application/json' \
+      --header "x-amz-security-token: ${METADATA_AWS_SESSION_TOKEN}" \
+      --aws-sigv4 aws:amz:${METADATA_AWS_REGION}:es \
+      --user "${METADATA_AWS_ACCESS_KEY_ID}:${METADATA_AWS_SECRET_ACCESS_KEY}" \
+      --data-raw '{
+      "parameters": {
+        "inputText": "What is the meaning of life?"
+      }
+    }'
+    ```
+ 1. Next, we'll create the Details table mapping pipeline.
+    ```bash
+    curl --request PUT \
+      ${OPENSEARCH_ENDPOINT}'/_ingest/pipeline/product-en-nlp-ingest-pipeline' \
+      --header 'Content-Type: application/json' \
+      --header 'Accept: application/json' \
+      --header "x-amz-security-token: ${METADATA_AWS_SESSION_TOKEN}" \
+      --aws-sigv4 aws:amz:${METADATA_AWS_REGION}:es \
+      --user "${METADATA_AWS_ACCESS_KEY_ID}:${METADATA_AWS_SECRET_ACCESS_KEY}" \
+      --data-raw '{
+      "description": "A text embedding pipeline",
+      "processors": [
+        {
+          "script": {
+            "source": "def combined_field = \"ProductID: \" + ctx.ProductID + \", Description: \" + ctx.Description + \", ProductName: \" + ctx.ProductName + \", Category: \" + ctx.Category; ctx.combined_field = combined_field;"
+          }
+        },
+        {
+          "text_embedding": {
+            "model_id": "'${MODEL_ID}'",
+            "field_map": {
+              "combined_field": "product_embedding"
+            }
+          }
+        }
+      ]
+    }'
+    ```
+ 1. Followed by the Reviews table mapping pipeline. We won't use this in this version of the lab, but in a real system you will want to keep your embeddings indexes separate for different queries.
+    ```bash
+    curl --request PUT \
+      ${OPENSEARCH_ENDPOINT}'/_ingest/pipeline/product-reviews-nlp-ingest-pipeline' \
+      --header 'Content-Type: application/json' \
+      --header 'Accept: application/json' \
+      --header "x-amz-security-token: ${METADATA_AWS_SESSION_TOKEN}" \
+      --aws-sigv4 aws:amz:${METADATA_AWS_REGION}:es \
+      --user "${METADATA_AWS_ACCESS_KEY_ID}:${METADATA_AWS_SECRET_ACCESS_KEY}" \
+      --data-raw '{
+      "description": "A text embedding pipeline",
+      "processors": [
+        {
+          "script": {
+            "source": "def combined_field = \"ProductID: \" + ctx.ProductID + \", ProductName: \" + ctx.ProductName + \", Comment: \" + ctx.Comment + \", Timestamp: \" + ctx.Timestamp; ctx.combined_field = combined_field;"
+          }
+        },
+        {
+          "text_embedding": {
+            "model_id": "m6jIgowBXLzE-9O0CcNs",
+            "field_map": {
+              "combined_field": "product_reviews_embedding"
+            }
+          }
+        }
+      ]
+    }'
+    ```
+
+    These pipelines allow OpenSearch to preprocess and enrich data as it is written to the index by adding embeddings through the Bedrock connector.