diff --git a/.github/action.yml b/.github/action.yml new file mode 100644 index 00000000..b3f35b13 --- /dev/null +++ b/.github/action.yml @@ -0,0 +1,37 @@ +name: "Init Environment" +description: "Initialize environment for tests" +runs: + using: "composite" + steps: + - name: Checkout actions + uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Install and configure Poetry + uses: snok/install-poetry@v1 + with: + virtualenvs-create: true + virtualenvs-in-project: true + installer-parallel: true + + - name: Load cached venv + id: cached-poetry-dependencies + uses: actions/cache@v3 + with: + path: .venv + key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }} + + - name: Install dependencies + if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' + run: poetry install --no-interaction --no-root --with test --with dev --all-extras + shell: bash + + - name: Activate venv + run: | + source .venv/bin/activate + echo PATH=$PATH >> $GITHUB_ENV + shell: bash \ No newline at end of file diff --git a/.github/workflows/aws.yml b/.github/workflows/aws.yml new file mode 100644 index 00000000..369aa43d --- /dev/null +++ b/.github/workflows/aws.yml @@ -0,0 +1,94 @@ +# This workflow will build and push a new container image to Amazon ECR, +# and then will deploy a new task definition to Amazon ECS, when there is a push to the "master" branch. +# +# To use this workflow, you will need to complete the following set-up steps: +# +# 1. Create an ECR repository to store your images. +# For example: `aws ecr create-repository --repository-name my-ecr-repo --region us-east-2`. +# Replace the value of the `ECR_REPOSITORY` environment variable in the workflow below with your repository's name. +# Replace the value of the `AWS_REGION` environment variable in the workflow below with your repository's region. +# +# 2. Create an ECS task definition, an ECS cluster, and an ECS service. +# For example, follow the Getting Started guide on the ECS console: +# https://us-east-2.console.aws.amazon.com/ecs/home?region=us-east-2#/firstRun +# Replace the value of the `ECS_SERVICE` environment variable in the workflow below with the name you set for the Amazon ECS service. +# Replace the value of the `ECS_CLUSTER` environment variable in the workflow below with the name you set for the cluster. +# +# 3. Store your ECS task definition as a JSON file in your repository. +# The format should follow the output of `aws ecs register-task-definition --generate-cli-skeleton`. +# Replace the value of the `ECS_TASK_DEFINITION` environment variable in the workflow below with the path to the JSON file. +# Replace the value of the `CONTAINER_NAME` environment variable in the workflow below with the name of the container +# in the `containerDefinitions` section of the task definition. +# +# 4. Store an IAM user access key in GitHub Actions secrets named `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`. +# See the documentation for each action used below for the recommended IAM policies for this IAM user, +# and best practices on handling the access key credentials. + +name: Deploy to Amazon ECS + +on: + push: + branches: [ "master" ] + +env: + AWS_REGION: MY_AWS_REGION # set this to your preferred AWS region, e.g. us-west-1 + ECR_REPOSITORY: MY_ECR_REPOSITORY # set this to your Amazon ECR repository name + ECS_SERVICE: MY_ECS_SERVICE # set this to your Amazon ECS service name + ECS_CLUSTER: MY_ECS_CLUSTER # set this to your Amazon ECS cluster name + ECS_TASK_DEFINITION: MY_ECS_TASK_DEFINITION # set this to the path to your Amazon ECS task definition + # file, e.g. .aws/task-definition.json + CONTAINER_NAME: MY_CONTAINER_NAME # set this to the name of the container in the + # containerDefinitions section of your task definition + +permissions: + contents: read + +jobs: + deploy: + name: Deploy + runs-on: ubuntu-latest + environment: production + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v2 + + - name: Build, tag, and push image to Amazon ECR + id: build-image + env: + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} + IMAGE_TAG: ${{ github.sha }} + run: | + # Build a docker container and + # push it to ECR so that it can + # be deployed to ECS. + docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG . + docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG + echo "image=$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" >> $GITHUB_OUTPUT + + - name: Fill in the new image ID in the Amazon ECS task definition + id: task-def + uses: aws-actions/amazon-ecs-render-task-definition@v1 + with: + task-definition: ${{ env.ECS_TASK_DEFINITION }} + container-name: ${{ env.CONTAINER_NAME }} + image: ${{ steps.build-image.outputs.image }} + + - name: Deploy Amazon ECS task definition + uses: aws-actions/amazon-ecs-deploy-task-definition@v1 + with: + task-definition: ${{ steps.task-def.outputs.task-definition }} + service: ${{ env.ECS_SERVICE }} + cluster: ${{ env.ECS_CLUSTER }} + wait-for-service-stability: true diff --git a/.github/workflows/bandit.yml b/.github/workflows/bandit.yml new file mode 100644 index 00000000..aeb83a65 --- /dev/null +++ b/.github/workflows/bandit.yml @@ -0,0 +1,52 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +# Bandit is a security linter designed to find common security issues in Python code. +# This action will run Bandit on your codebase. +# The results of the scan will be found under the Security tab of your repository. + +# https://github.com/marketplace/actions/bandit-scan is ISC licensed, by abirismyname +# https://pypi.org/project/bandit/ is Apache v2.0 licensed, by PyCQA + +name: Bandit +on: + push: + branches: [ "master" ] + pull_request: + # The branches below must be a subset of the branches above + branches: [ "master" ] + schedule: + - cron: '42 5 * * 0' + +jobs: + bandit: + permissions: + contents: read # for actions/checkout to fetch code + security-events: write # for github/codeql-action/upload-sarif to upload SARIF results + actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status + + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Bandit Scan + uses: shundor/python-bandit-scan@9cc5aa4a006482b8a7f91134412df6772dbda22c + with: # optional arguments + # exit with 0, even with results found + exit_zero: true # optional, default is DEFAULT + # Github token of the repository (automatically created by Github) + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Needed to get PR information. + # File or directory to run bandit on + # path: # optional, default is . + # Report only issues of a given severity level or higher. Can be LOW, MEDIUM or HIGH. Default is UNDEFINED (everything) + # level: # optional, default is UNDEFINED + # Report only issues of a given confidence level or higher. Can be LOW, MEDIUM or HIGH. Default is UNDEFINED (everything) + # confidence: # optional, default is UNDEFINED + # comma-separated list of paths (glob patterns supported) to exclude from scan (note that these are in addition to the excluded paths provided in the config file) (default: .svn,CVS,.bzr,.hg,.git,__pycache__,.tox,.eggs,*.egg) + # excluded_paths: # optional, default is DEFAULT + # comma-separated list of test IDs to skip + # skips: # optional, default is DEFAULT + # path to a .bandit file that supplies command line arguments + # ini_path: # optional, default is DEFAULT + diff --git a/.github/workflows/bearer.yml b/.github/workflows/bearer.yml new file mode 100644 index 00000000..a18c9332 --- /dev/null +++ b/.github/workflows/bearer.yml @@ -0,0 +1,43 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. +# +# This workflow file requires a free account on Bearer.com to manage findings, notifications and more. +# See https://docs.bearer.com/guides/bearer-cloud/ +name: Bearer + +on: + push: + branches: ["master" ] + pull_request: + # The branches below must be a subset of the branches above + branches: ["master"] + schedule: + - cron: '22 2 * * 0' + +permissions: + contents: read # for actions/checkout to fetch code + security-events: write # for github/codeql-action/upload-sarif to upload SARIF results + actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status + +jobs: + bearer: + runs-on: ubuntu-latest + steps: + # Checkout project source + - uses: actions/checkout@v4 + # Scan code using Bearer CLI + - name: Run Report + id: report + uses: bearer/bearer-action@828eeb928ce2f4a7ca5ed57fb8b59508cb8c79bc + with: + api-key: ${{ secrets.BEARER_TOKEN }} + format: sarif + output: results.sarif + exit-code: 0 + # Upload SARIF file generated in previous step + - name: Upload SARIF file + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: results.sarif diff --git a/.github/workflows/codacy.yml b/.github/workflows/codacy.yml new file mode 100644 index 00000000..6bd05e25 --- /dev/null +++ b/.github/workflows/codacy.yml @@ -0,0 +1,61 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +# This workflow checks out code, performs a Codacy security scan +# and integrates the results with the +# GitHub Advanced Security code scanning feature. For more information on +# the Codacy security scan action usage and parameters, see +# https://github.com/codacy/codacy-analysis-cli-action. +# For more information on Codacy Analysis CLI in general, see +# https://github.com/codacy/codacy-analysis-cli. + +name: Codacy Security Scan + +on: + push: + branches: [ "master" ] + pull_request: + # The branches below must be a subset of the branches above + branches: [ "master" ] + schedule: + - cron: '37 4 * * 0' + +permissions: + contents: read + +jobs: + codacy-security-scan: + permissions: + contents: read # for actions/checkout to fetch code + security-events: write # for github/codeql-action/upload-sarif to upload SARIF results + actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status + name: Codacy Security Scan + runs-on: ubuntu-latest + steps: + # Checkout the repository to the GitHub Actions runner + - name: Checkout code + uses: actions/checkout@v4 + + # Execute Codacy Analysis CLI and generate a SARIF output with the security issues identified during the analysis + - name: Run Codacy Analysis CLI + uses: codacy/codacy-analysis-cli-action@97bf5df3c09e75f5bcd72695998f96ebd701846e + with: + # Check https://github.com/codacy/codacy-analysis-cli#project-token to get your project token from your Codacy repository + # You can also omit the token and run the tools that support default configurations + project-token: ${{ secrets.CODACY_PROJECT_TOKEN }} + verbose: true + output: results.sarif + format: sarif + # Adjust severity of non-security issues + gh-code-scanning-compat: true + # Force 0 exit code to allow SARIF file generation + # This will handover control about PR rejection to the GitHub side + max-allowed-issues: 2147483647 + + # Upload the SARIF file generated in the previous step + - name: Upload SARIF results file + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: results.sarif diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 00000000..6ddde5c5 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,81 @@ +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +# +# ******** NOTE ******** +# We have attempted to detect the languages in your repository. Please check +# the `language` matrix defined below to confirm you have the correct set of +# supported CodeQL languages. +# +name: "CodeQL" + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + schedule: + - cron: '38 20 * * 4' + +jobs: + analyze: + name: Analyze + # Runner size impacts CodeQL analysis time. To learn more, please see: + # - https://gh.io/recommended-hardware-resources-for-running-codeql + # - https://gh.io/supported-runners-and-hardware-resources + # - https://gh.io/using-larger-runners + # Consider using larger runners for possible analysis time improvements. + runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} + timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: [ 'python' ] + # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ] + # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both + # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both + # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + + # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs + # queries: security-extended,security-and-quality + + + # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@v3 + + # âšī¸ Command-line programs to run using the OS shell. + # đ See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun + + # If the Autobuild fails above, remove it and uncomment the following three lines. + # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. + + # - run: | + # echo "Run, Build Application using script" + # ./location_of_script_within_repo/buildscript.sh + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{matrix.language}}" diff --git a/.github/workflows/crda.yml b/.github/workflows/crda.yml new file mode 100644 index 00000000..e48aea48 --- /dev/null +++ b/.github/workflows/crda.yml @@ -0,0 +1,126 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +# This workflow performs a static analysis of your source code using +# Red Hat CodeReady Dependency Analytics. + +# Scans are triggered: +# 1. On every push to default and protected branches +# 2. On every Pull Request targeting the default branch +# 3. On a weekly schedule +# 4. Manually, on demand, via the "workflow_dispatch" event + +# đ The CRDA Starter workflow will: +# - Checkout your repository +# - Setup the required tool stack +# - Install the CRDA command line tool +# - Auto detect the manifest file and install the project's dependencies +# - Perform the security scan using CRDA +# - Upload the SARIF result to the GitHub Code Scanning which can be viewed under the security tab +# - Optionally upload the SARIF file as an artifact for the future reference + +# âšī¸ Configure your repository and the workflow with the following steps: +# 1. Setup the tool stack based on the project's requirement. +# Refer to: https://github.com/redhat-actions/crda/#1-set-up-the-tool-stack +# 2. (Optional) CRDA action attempt to detect the language and install the +# required dependencies for your project. If your project doesn't aligns +# with the default dependency installation command mentioned here +# https://github.com/redhat-actions/crda/#3-installing-dependencies. +# Use the required inputs to setup the same +# 3. (Optional) CRDA action attempts to detect the manifest file if it is +# present in the root of the project and named as per the default mentioned +# here https://github.com/redhat-actions/crda/#3-installing-dependencies. +# If it deviates from the default, use the required inputs to setup the same +# 4. Setup Authentication - Create the CRDA_KEY or SNYK_TOKEN. +# Refer to: https://github.com/redhat-actions/crda/#4-set-up-authentication +# 5. (Optional) Upload SARIF file as an Artifact to download and view +# 6. Commit and push the workflow file to your default branch to trigger a workflow run. + +# đ Visit our GitHub organization at https://github.com/redhat-actions/ to see our actions and provide feedback. + +name: CRDA Scan + +# Controls when the workflow will run +on: + # TODO: Customize trigger events based on your DevSecOps processes + # + # This workflow is made to run with OpenShift starter workflow + # https://github.com/actions/starter-workflows/blob/main/deployments/openshift.yml + # However, if you want to run this workflow as a standalone workflow, please + # uncomment the 'push' trigger below and configure it based on your requirements. + # + workflow_call: + secrets: + CRDA_KEY: + required: false + SNYK_TOKEN: + required: false + workflow_dispatch: + + # push: + # branches: [ "master" ] + + # pull_request_target is used to securely share secret to the PR's workflow run. + # For more info visit: https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request_target + pull_request_target: + branches: [ "master" ] + types: [ assigned, opened, synchronize, reopened, labeled, edited ] + +permissions: + contents: read + +jobs: + crda-scan: + permissions: + contents: read # for actions/checkout to fetch code + security-events: write # for redhat-actions/crda to upload SARIF results + name: Scan project vulnerabilities with CRDA + runs-on: ubuntu-20.04 + steps: + + - name: Check out repository + uses: actions/checkout@v4 + + # ******************************************************************* + # Required: Instructions to setup project + # 1. Setup Go, Java, Node.js or Python depending on your project type + # 2. Setup Actions are listed below, choose one from them: + # - Go: https://github.com/actions/setup-go + # - Java: https://github.com/actions/setup-java + # - Node.js: https://github.com/actions/setup-node + # - Python: https://github.com/actions/setup-python + # + # Example: + # - name: Setup Node + # uses: actions/setup-node@v2 + # with: + # node-version: '14' + + # https://github.com/redhat-actions/openshift-tools-installer/blob/main/README.md + - name: Install CRDA CLI + uses: redhat-actions/openshift-tools-installer@v1 + with: + source: github + github_pat: ${{ github.token }} + # Choose the desired version of the CRDA CLI + crda: "latest" + + ###################################################################################### + # https://github.com/redhat-actions/crda/blob/main/README.md + # + # By default, CRDA will detect the manifest file and install the required dependencies + # using the standard command for the project type. + # If your project doesn't aligns with the defaults mentioned in this action, you will + # need to set few inputs that are described here: + # https://github.com/redhat-actions/crda/blob/main/README.md#3-installing-dependencies + # Visit https://github.com/redhat-actions/crda/#4-set-up-authentication to understand + # process to get a SNYK_TOKEN or a CRDA_KEY + - name: CRDA Scan + id: scan + uses: redhat-actions/crda@v1 + with: + crda_key: ${{ secrets.CRDA_KEY }} # Either use crda_key or snyk_token + # snyk_token: ${{ secrets.SNYK_TOKEN }} + # upload_artifact: false # Set this to false to skip artifact upload diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml new file mode 100644 index 00000000..0d4a0136 --- /dev/null +++ b/.github/workflows/dependency-review.yml @@ -0,0 +1,20 @@ +# Dependency Review Action +# +# This Action will scan dependency manifest files that change as part of a Pull Request, surfacing known-vulnerable versions of the packages declared or updated in the PR. Once installed, if the workflow run is marked as required, PRs introducing known-vulnerable packages will be blocked from merging. +# +# Source repository: https://github.com/actions/dependency-review-action +# Public documentation: https://docs.github.com/en/code-security/supply-chain-security/understanding-your-software-supply-chain/about-dependency-review#dependency-review-enforcement +name: 'Dependency Review' +on: [pull_request] + +permissions: + contents: read + +jobs: + dependency-review: + runs-on: ubuntu-latest + steps: + - name: 'Checkout Repository' + uses: actions/checkout@v4 + - name: 'Dependency Review' + uses: actions/dependency-review-action@v4 diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 0f89cb4c..a69556bd 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -11,9 +11,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: - python-version: 3.x - - run: pip install mkdocs-material - - run: pip install "mkdocstrings[python]" - - run: mkdocs gh-deploy --force \ No newline at end of file + python-version: '3.10' + - run: pip install --no-cache-dir mkdocs-material + - run: pip install --no-cache-dir "mkdocstrings[python]" + - run: pip install --no-cache-dir mkdocs-glightbox + - run: mkdocs gh-deploy --force diff --git a/.github/workflows/generator-generic-ossf-slsa3-publish.yml b/.github/workflows/generator-generic-ossf-slsa3-publish.yml new file mode 100644 index 00000000..35de4f7c --- /dev/null +++ b/.github/workflows/generator-generic-ossf-slsa3-publish.yml @@ -0,0 +1,66 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +# This workflow lets you generate SLSA provenance file for your project. +# The generation satisfies level 3 for the provenance requirements - see https://slsa.dev/spec/v0.1/requirements +# The project is an initiative of the OpenSSF (openssf.org) and is developed at +# https://github.com/slsa-framework/slsa-github-generator. +# The provenance file can be verified using https://github.com/slsa-framework/slsa-verifier. +# For more information about SLSA and how it improves the supply-chain, visit slsa.dev. + +name: SLSA generic generator +on: + workflow_dispatch: + release: + types: [created] + +jobs: + build: + runs-on: ubuntu-latest + outputs: + digests: ${{ steps.hash.outputs.digests }} + + steps: + - uses: actions/checkout@v4 + + # ======================================================== + # + # Step 1: Build your artifacts. + # + # ======================================================== + - name: Build artifacts + run: | + # These are some amazing artifacts. + echo "artifact1" > artifact1 + echo "artifact2" > artifact2 + + # ======================================================== + # + # Step 2: Add a step to generate the provenance subjects + # as shown below. Update the sha256 sum arguments + # to include all binaries that you generate + # provenance for. + # + # ======================================================== + - name: Generate subject for provenance + id: hash + run: | + set -euo pipefail + + # List the artifacts the provenance will refer to. + files=$(ls artifact*) + # Generate the subjects (base64 encoded). + echo "hashes=$(sha256sum $files | base64 -w0)" >> "${GITHUB_OUTPUT}" + + provenance: + needs: [build] + permissions: + actions: read # To read the workflow path. + id-token: write # To sign the provenance. + contents: write # To add assets to a release. + uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0 + with: + base64-subjects: "${{ needs.build.outputs.digests }}" + upload-assets: true # Optional: Upload to a new release diff --git a/.github/workflows/label.yml b/.github/workflows/label.yml index 46135690..d23c4d40 100644 --- a/.github/workflows/label.yml +++ b/.github/workflows/label.yml @@ -17,6 +17,6 @@ jobs: pull-requests: write steps: - - uses: actions/labeler@v4 + - uses: actions/labeler@v5 with: repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 197e3dbf..fb8f5879 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -14,14 +14,14 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.8] + python-version: [3.10] steps: - name: đī¸ Checkout uses: actions/checkout@v4 with: ref: ${{ github.head_ref }} - name: đ Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index c73e032c..f334972b 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -7,16 +7,16 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.9", "3.10"] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade pip + python -m pip install --no-cache-dir --upgrade pip pip install pylint - name: Analysing the code with pylint run: | diff --git a/.github/workflows/pyre.yml b/.github/workflows/pyre.yml new file mode 100644 index 00000000..53aca44d --- /dev/null +++ b/.github/workflows/pyre.yml @@ -0,0 +1,46 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +# This workflow integrates Pyre with GitHub's +# Code Scanning feature. +# +# Pyre is a performant type checker for Python compliant with +# PEP 484. Pyre can analyze codebases with millions of lines +# of code incrementally â providing instantaneous feedback +# to developers as they write code. +# +# See https://pyre-check.org + +name: Pyre + +on: + workflow_dispatch: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +permissions: + contents: read + +jobs: + pyre: + permissions: + actions: read + contents: read + security-events: write + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: true + + - name: Run Pyre + uses: facebook/pyre-action@12b8d923443ea66cb657facc2e5faac1c8c86e64 + with: + # To customize these inputs: + # See https://github.com/facebook/pyre-action#inputs + repo-directory: './' + requirements-path: 'requirements.txt' diff --git a/.github/workflows/pysa.yml b/.github/workflows/pysa.yml new file mode 100644 index 00000000..c420e3cb --- /dev/null +++ b/.github/workflows/pysa.yml @@ -0,0 +1,50 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +# This workflow integrates Python Static Analyzer (Pysa) with +# GitHub's Code Scanning feature. +# +# Python Static Analyzer (Pysa) is a security-focused static +# analysis tool that tracks flows of data from where they +# originate to where they terminate in a dangerous location. +# +# See https://pyre-check.org/docs/pysa-basics/ + +name: Pysa + +on: + workflow_dispatch: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + schedule: + - cron: '42 23 * * 1' + +permissions: + contents: read + +jobs: + pysa: + permissions: + actions: read + contents: read + security-events: write + + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: true + + - name: Run Pysa + uses: facebook/pysa-action@f46a63777e59268613bd6e2ff4e29f144ca9e88b + with: + # To customize these inputs: + # See https://github.com/facebook/pysa-action#inputs + repo-directory: './' + requirements-path: 'requirements.txt' + infer-types: true + include-default-sapp-filters: true diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml new file mode 100644 index 00000000..7d4d3f9e --- /dev/null +++ b/.github/workflows/python-app.yml @@ -0,0 +1,39 @@ +# This workflow will install Python dependencies, run tests and lint with a single version of Python +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: Python application + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +permissions: + contents: read + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.10 + uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8 pytest torchfix + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test with pytest + run: | + pytest diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml new file mode 100644 index 00000000..b1c28369 --- /dev/null +++ b/.github/workflows/python-package-conda.yml @@ -0,0 +1,34 @@ +name: Python Package using Conda + +on: [push] + +jobs: + build-linux: + runs-on: ubuntu-latest + strategy: + max-parallel: 5 + + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.10 + uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Add conda to system path + run: | + # $CONDA is an environment variable pointing to the root of the miniconda directory + echo $CONDA/bin >> $GITHUB_PATH + - name: Install dependencies + run: | + conda env update --file environment.yml --name base + - name: Lint with flake8 + run: | + conda install flake8 torchfix + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test with pytest + run: | + conda install pytest + pytest diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml new file mode 100644 index 00000000..129843da --- /dev/null +++ b/.github/workflows/python-package.yml @@ -0,0 +1,40 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: Python package + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.9", "3.10", "3.11"] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --no-cache-dir --upgrade pip + python -m pip install --no-cache-dir flake8 pytest torchfix + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test with pytest + run: | + pytest diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index a55e43ea..4a190eae 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -16,17 +16,17 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: - python-version: '3.x' + python-version: '3.10' - name: Install dependencies run: | - python -m pip install --upgrade pip + python -m pip install --no-cache-dir --upgrade pip pip install build - name: Build package run: python -m build - name: Publish package - uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 + uses: pypa/gh-action-pypi-publish@ec4db0b4ddc65acdf4bff5fa45ac92d78b56bdf0 with: user: __token__ password: ${{ secrets.PYPI_API_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index dc72e039..3aa6410b 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -18,7 +18,7 @@ jobs: pull-requests: write steps: - - uses: actions/stale@v8 + - uses: actions/stale@v9 with: repo-token: ${{ secrets.GITHUB_TOKEN }} stale-issue-message: 'Stale issue message' diff --git a/.github/workflows/super-linter.yml b/.github/workflows/super-linter.yml new file mode 100644 index 00000000..f01abd03 --- /dev/null +++ b/.github/workflows/super-linter.yml @@ -0,0 +1,29 @@ +# This workflow executes several linters on changed files based on languages used in your code base whenever +# you push a code or open a pull request. +# +# You can adjust the behavior by modifying this file. +# For more information, see: +# https://github.com/github/super-linter +name: Lint Code Base + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] +jobs: + run-lint: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + # Full git history is needed to get a proper list of changed files within `super-linter` + fetch-depth: 0 + + - name: Lint Code Base + uses: github/super-linter@v6 + env: + VALIDATE_ALL_CODEBASE: false + DEFAULT_BRANCH: "master" + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/terraform.yml b/.github/workflows/terraform.yml new file mode 100644 index 00000000..2609d47a --- /dev/null +++ b/.github/workflows/terraform.yml @@ -0,0 +1,93 @@ +# This workflow installs the latest version of Terraform CLI and configures the Terraform CLI configuration file +# with an API token for Terraform Cloud (app.terraform.io). On pull request events, this workflow will run +# `terraform init`, `terraform fmt`, and `terraform plan` (speculative plan via Terraform Cloud). On push events +# to the "master" branch, `terraform apply` will be executed. +# +# Documentation for `hashicorp/setup-terraform` is located here: https://github.com/hashicorp/setup-terraform +# +# To use this workflow, you will need to complete the following setup steps. +# +# 1. Create a `main.tf` file in the root of this repository with the `remote` backend and one or more resources defined. +# Example `main.tf`: +# # The configuration for the `remote` backend. +# terraform { +# backend "remote" { +# # The name of your Terraform Cloud organization. +# organization = "example-organization" +# +# # The name of the Terraform Cloud workspace to store Terraform state files in. +# workspaces { +# name = "example-workspace" +# } +# } +# } +# +# # An example resource that does nothing. +# resource "null_resource" "example" { +# triggers = { +# value = "A example resource that does nothing!" +# } +# } +# +# +# 2. Generate a Terraform Cloud user API token and store it as a GitHub secret (e.g. TF_API_TOKEN) on this repository. +# Documentation: +# - https://www.terraform.io/docs/cloud/users-teams-organizations/api-tokens.html +# - https://help.github.com/en/actions/configuring-and-managing-workflows/creating-and-storing-encrypted-secrets +# +# 3. Reference the GitHub secret in step using the `hashicorp/setup-terraform` GitHub Action. +# Example: +# - name: Setup Terraform +# uses: hashicorp/setup-terraform@v3 +# with: +# cli_config_credentials_token: ${{ secrets.TF_API_TOKEN }} + +name: 'Terraform' + +on: + push: + branches: [ "master" ] + pull_request: + +permissions: + contents: read + +jobs: + terraform: + name: 'Terraform' + runs-on: ubuntu-latest + environment: production + + # Use the Bash shell regardless whether the GitHub Actions runner is ubuntu-latest, macos-latest, or windows-latest + defaults: + run: + shell: bash + + steps: + # Checkout the repository to the GitHub Actions runner + - name: Checkout + uses: actions/checkout@v4 + + # Install the latest version of Terraform CLI and configure the Terraform CLI configuration file with a Terraform Cloud user API token + - name: Setup Terraform + uses: hashicorp/setup-terraform@v3 + with: + cli_config_credentials_token: ${{ secrets.TF_API_TOKEN }} + + # Initialize a new or existing Terraform working directory by creating initial files, loading any remote state, downloading modules, etc. + - name: Terraform Init + run: terraform init + + # Checks that all Terraform configuration files adhere to a canonical format + - name: Terraform Format + run: terraform fmt -check + + # Generates an execution plan for Terraform + - name: Terraform Plan + run: terraform plan -input=false + + # On push to "master", build or change infrastructure according to Terraform configuration files + # Note: It is recommended to set up a required "strict" status check in your repository for "Terraform Cloud". See the documentation on "strict" required status checks for more information: https://help.github.com/en/github/administering-a-repository/types-of-required-status-checks + - name: Terraform Apply + if: github.ref == 'refs/heads/"master"' && github.event_name == 'push' + run: terraform apply -auto-approve -input=false diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 65dc68d9..e2fb311a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -15,7 +15,6 @@ jobs: strategy: matrix: python-version: - - "3.8" - "3.9" - "3.10" - "3.11" diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml index 7bb929b8..8fd36915 100644 --- a/.github/workflows/unit-test.yml +++ b/.github/workflows/unit-test.yml @@ -16,30 +16,18 @@ jobs: - uses: actions/checkout@v4 - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.10' - name: Install dependencies - run: pip install -r requirements.txt + run: pip install --no-cache-dir -r requirements.txt - name: Run Python unit tests - run: python3 -m unittest tests/zeta + run: python3 -m pytest - name: Verify that the Docker image for the action builds run: docker build . --file Dockerfile - - - name: Integration test 1 - uses: ./ - with: - input-one: something - input-two: true - - - name: Integration test 2 - uses: ./ - with: - input-one: something else - input-two: false - + - name: Verify integration test results - run: python3 -m unittest unittesting/zeta + run: python3 -m pytest diff --git a/.github/workflows/welcome.yml b/.github/workflows/welcome.yml index a993236c..51372fe2 100644 --- a/.github/workflows/welcome.yml +++ b/.github/workflows/welcome.yml @@ -10,8 +10,9 @@ jobs: build: name: đ Welcome runs-on: ubuntu-latest + permissions: write-all steps: - - uses: actions/first-interaction@v1.1.1 + - uses: actions/first-interaction@v1.3.0 with: repo-token: ${{ secrets.GITHUB_TOKEN }} issue-message: "Hello there, thank you for opening an Issue ! đđģ The team was notified and they will get back to you asap." diff --git a/.gitignore b/.gitignore index 1c21c0cd..d6b048a1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# Zeta-specific +experimental_tests.py + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -11,9 +14,12 @@ data # Distribution / packaging .Python build/ +.ruff_cache +.vscode develop-eggs/ dist/ downloads/ +.errors.txt eggs/ .eggs/ lib/ @@ -22,6 +28,7 @@ parts/ sdist/ var/ wheels/ +errors.txt share/python-wheels/ *.egg-info/ .installed.cfg diff --git a/README.md b/README.md index 2ba3d062..6eabf52b 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ [![Multi-Modality](images/agorabanner.png)](https://discord.gg/qUtxnK2NMf) ![Zeta banner](images/zeta.png) +Build SOTA AI Models 80% faster with modular, high-performance, and scalable building blocks! [![Docs](https://readthedocs.org/projects/zeta/badge/)](https://zeta.readthedocs.io) @@ -9,23 +10,36 @@
-Build High-performance, agile, and scalable AI models with modular and re-useable building blocks! +[![Join our Discord](https://img.shields.io/badge/Discord-Join%20our%20server-5865F2?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/agora-999382051935506503) [![Subscribe on YouTube](https://img.shields.io/badge/YouTube-Subscribe-red?style=for-the-badge&logo=youtube&logoColor=white)](https://www.youtube.com/@kyegomez3242) [![Connect on LinkedIn](https://img.shields.io/badge/LinkedIn-Connect-blue?style=for-the-badge&logo=linkedin&logoColor=white)](https://www.linkedin.com/in/kye-g-38759a207/) [![Follow on X.com](https://img.shields.io/badge/X.com-Follow-1DA1F2?style=for-the-badge&logo=x&logoColor=white)](https://x.com/kyegomezb) -# đ¤ Schedule a 1-on-1 Session -Book a [1-on-1 Session with Kye](https://calendly.com/apacai/agora), the Creator, to discuss any issues, provide feedback, or explore how we can improve Zeta for you. +[![GitHub issues](https://img.shields.io/github/issues/kyegomez/zeta)](https://github.com/kyegomez/zeta/issues) [![GitHub forks](https://img.shields.io/github/forks/kyegomez/zeta)](https://github.com/kyegomez/zeta/network) [![GitHub stars](https://img.shields.io/github/stars/kyegomez/zeta)](https://github.com/kyegomez/zeta/stargazers) [![GitHub license](https://img.shields.io/github/license/kyegomez/zeta)](https://github.com/kyegomez/zeta/blob/main/LICENSE)[![GitHub star chart](https://img.shields.io/github/stars/kyegomez/zeta?style=social)](https://star-history.com/#kyegomez/zeta)[![Dependency Status](https://img.shields.io/librariesio/github/kyegomez/zeta)](https://libraries.io/github/kyegomez/zeta) [![Downloads](https://static.pepy.tech/badge/zeta/month)](https://pepy.tech/project/zeta) + +[![Join the Agora discord](https://img.shields.io/discord/1110910277110743103?label=Discord&logo=discord&logoColor=white&style=plastic&color=d7b023)![Share on Twitter](https://img.shields.io/twitter/url/https/twitter.com/cloudposse.svg?style=social&label=Share%20%40kyegomez/zeta)](https://twitter.com/intent/tweet?text=Check%20out%20this%20amazing%20AI%20project:%20&url=https%3A%2F%2Fgithub.com%2Fkyegomez%2Fzeta) [![Share on Facebook](https://img.shields.io/badge/Share-%20facebook-blue)](https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fgithub.com%2Fkyegomez%2Fzeta) [![Share on LinkedIn](https://img.shields.io/badge/Share-%20linkedin-blue)](https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fgithub.com%2Fkyegomez%2Fzeta&title=&summary=&source=) + +[![Share on Reddit](https://img.shields.io/badge/-Share%20on%20Reddit-orange)](https://www.reddit.com/submit?url=https%3A%2F%2Fgithub.com%2Fkyegomez%2Fzeta&title=zeta%20-%20the%20future%20of%20AI) [![Share on Hacker News](https://img.shields.io/badge/-Share%20on%20Hacker%20News-orange)](https://news.ycombinator.com/submitlink?u=https%3A%2F%2Fgithub.com%2Fkyegomez%2Fzeta&t=zeta%20-%20the%20future%20of%20AI) [![Share on Pinterest](https://img.shields.io/badge/-Share%20on%20Pinterest-red)](https://pinterest.com/pin/create/button/?url=https%3A%2F%2Fgithub.com%2Fkyegomez%2Fzeta&media=https%3A%2F%2Fexample.com%2Fimage.jpg&description=zeta%20-%20the%20future%20of%20AI) [![Share on WhatsApp](https://img.shields.io/badge/-Share%20on%20WhatsApp-green)](https://api.whatsapp.com/send?text=Check%20out%20zeta%20-%20the%20future%20of%20AI%20%23zeta%20%23AI%0A%0Ahttps%3A%2F%2Fgithub.com%2Fkyegomez%2Fzeta) + +After building out thousands of neural nets and facing the same annoying bottlenecks of chaotic codebases with no modularity and low performance modules, Zeta needed to be born to enable me and others to quickly prototype, train, and optimize the latest SOTA neural nets and deploy them into production. +Zeta places a radical emphasis on useability, modularity, and performance. Zeta is now currently employed in 100s of models across my github and across others. +Get started below and LMK if you want my help building any model, I'm here for you đ đ -## Installation -`pip install zetascale` +# Install + +```bash +$ pip3 install -U zetascale +``` + +# Usage -## Initiating Your Journey +## Starting Your Journey Creating a model empowered with the aforementioned breakthrough research features is a breeze. Here's how to quickly materialize the renowned Flash Attention ```python import torch -from zeta.nn.attention import FlashAttention + +from zeta.nn import FlashAttention q = torch.randn(2, 4, 6, 8) k = torch.randn(2, 4, 10, 8) @@ -34,22 +48,532 @@ v = torch.randn(2, 4, 10, 8) attention = FlashAttention(causal=False, dropout=0.1, flash=True) output = attention(q, k, v) -print(output.shape) +print(output.shape) +``` + + + +### `SwiGLU` +The SwiGLU activation function takes an input tensor and applies a gating mechanism to selectively pass information. It consists of two parts: the "switch" gate and the "glu" gate. The switch gate controls the flow of information, while the glu gate performs a non-linear transformation on the input. + + +```python +import torch + +from zeta.nn import SwiGLUStacked + +x = torch.randn(5, 10) +swiglu = SwiGLUStacked(10, 20) +swiglu(x).shape +``` + +In this example, we first import the necessary modules, including torch for tensor operations and SwiGLUStacked from zeta.nn for the SwiGLU activation function. + +We then create a random input tensor x with a shape of (5, 10). Next, we instantiate an instance of SwiGLUStacked with an input size of 10 and an output size of 20. + +Finally, we pass the input tensor x to the swiglu module, which applies the SwiGLU activation function to it. The resulting output tensor is stored in the output variable. We print the shape of the output tensor to see the + +------- + +### RelativePositionBias +- `RelativePositionBias` quantizes the distance between two positions into a certain number of buckets and then uses an embedding to get the relative position bias. This mechanism aids in the attention mechanism by providing biases based on relative positions between the query and key, rather than relying solely on their absolute positions. + +```python +import torch +from torch import nn + +from zeta.nn import RelativePositionBias + +# Initialize the RelativePositionBias module +rel_pos_bias = RelativePositionBias() + +# Example 1: Compute bias for a single batch +bias_matrix = rel_pos_bias(1, 10, 10) + + +# Example 2: Utilize in conjunction with an attention mechanism +# NOTE: This is a mock example, and may not represent an actual attention mechanism's complete implementation. +class MockAttention(nn.Module): + def __init__(self): + super().__init__() + self.rel_pos_bias = RelativePositionBias() + + def forward(self, queries, keys): + bias = self.rel_pos_bias(queries.size(0), queries.size(1), keys.size(1)) + # Further computations with bias in the attention mechanism... + return None # Placeholder + + +# Example 3: Modify default configurations +custom_rel_pos_bias = RelativePositionBias( + bidirectional=False, num_buckets=64, max_distance=256, num_heads=8 +) +``` + +### `FeedForward` +The FeedForward module performs a feedforward operation on the input tensor x. It consists of a multi-layer perceptron (MLP) with an optional activation function and LayerNorm. +Used in most language, multi-modal, and modern neural networks. + +```python +import torch + +from zeta.nn import FeedForward + +model = FeedForward(256, 512, glu=True, post_act_ln=True, dropout=0.2) + +x = torch.randn(1, 256) + +output = model(x) +print(output.shape) +``` + +### `BitLinear` +- The BitLinear module performs linear transformation on the input data, followed by quantization and dequantization. The quantization process is performed using the absmax_quantize function, which quantizes the input tensor based on the absolute maximum value, [from the paper](https://arxiv.org/abs/2310.11453) +```python +import torch +from torch import nn + +import zeta.quant as qt + +class MyModel(nn.Module): + def __init__(self): + super().__init__() + self.linear = qt.BitLinear(10, 20) + + def forward(self, x): + return self.linear(x) + + +# Initialize the model +model = MyModel() + +# Create a random tensor of size (128, 10) +input = torch.randn(128, 10) + +# Perform the forward pass +output = model(input) + +# Print the size of the output +print(output.size()) # torch.Size([128, 20]) +``` + +### `PalmE` +- This is an implementation of the multi-modal Palm-E model using a decoder llm as the backbone with an VIT image encoder to process vision, it's very similiar to GPT4, Kosmos, RTX2, and many other multi-modality model architectures + +```python +import torch + +from zeta.structs import ( + AutoRegressiveWrapper, + Decoder, + Encoder, + Transformer, + ViTransformerWrapper, +) + + +class PalmE(torch.nn.Module): + """ + PalmE is a transformer architecture that uses a ViT encoder and a transformer decoder. + + Args: + + image_size (int): Size of the image. + patch_size (int): Size of the patch. + encoder_dim (int): Dimension of the encoder. + encoder_depth (int): Depth of the encoder. + encoder_heads (int): Number of heads in the encoder. + num_tokens (int): Number of tokens. + max_seq_len (int): Maximum sequence length. + decoder_dim (int): Dimension of the decoder. + decoder_depth (int): Depth of the decoder. + decoder_heads (int): Number of heads in the decoder. + alibi_num_heads (int): Number of heads in the alibi attention. + attn_kv_heads (int): Number of heads in the attention key-value projection. + use_abs_pos_emb (bool): Whether to use absolute positional embeddings. + cross_attend (bool): Whether to cross attend in the decoder. + alibi_pos_bias (bool): Whether to use positional bias in the alibi attention. + rotary_xpos (bool): Whether to use rotary positional embeddings. + attn_flash (bool): Whether to use attention flash. + qk_norm (bool): Whether to normalize the query and key in the attention layer. + + Returns: + + torch.Tensor: The output of the model. + + Usage: + + img = torch.randn(1, 3, 256, 256) + text = torch.randint(0, 20000, (1, 1024)) + model = PalmE() + output = model(img, text) + print(output) + + """ + + def __init__( + self, + image_size=256, + patch_size=32, + encoder_dim=512, + encoder_depth=6, + encoder_heads=8, + num_tokens=20000, + max_seq_len=1024, + decoder_dim=512, + decoder_depth=6, + decoder_heads=8, + alibi_num_heads=4, + attn_kv_heads=2, + use_abs_pos_emb=False, + cross_attend=True, + alibi_pos_bias=True, + rotary_xpos=True, + attn_flash=True, + qk_norm=True, + ): + super().__init__() + + # vit architecture + self.encoder = ViTransformerWrapper( + image_size=image_size, + patch_size=patch_size, + attn_layers=Encoder( + dim=encoder_dim, depth=encoder_depth, heads=encoder_heads + ), + ) + + # palm model architecture + self.decoder = Transformer( + num_tokens=num_tokens, + max_seq_len=max_seq_len, + use_abs_pos_emb=use_abs_pos_emb, + attn_layers=Decoder( + dim=decoder_dim, + depth=decoder_depth, + heads=decoder_heads, + cross_attend=cross_attend, + alibi_pos_bias=alibi_pos_bias, + alibi_num_heads=alibi_num_heads, + rotary_xpos=rotary_xpos, + attn_kv_heads=attn_kv_heads, + attn_flash=attn_flash, + qk_norm=qk_norm, + ), + ) + + # autoregressive wrapper to enable generation of tokens + self.decoder = AutoRegressiveWrapper(self.decoder) + + def forward(self, img: torch.Tensor, text: torch.Tensor): + """Forward pass of the model.""" + try: + encoded = self.encoder(img, return_embeddings=True) + return self.decoder(text, context=encoded) + except Exception as error: + print(f"Failed in forward method: {error}") + raise + + +# Usage with random inputs +img = torch.randn(1, 3, 256, 256) +text = torch.randint(0, 20000, (1, 1024)) + +# Initiliaze the model +model = PalmE() +output = model(img, text) +print(output) ``` + +### `Unet` +Unet is a famous convolutional neural network architecture originally used for biomedical image segmentation but soon became the backbone of the generative AI Mega-revolution. The architecture comprises two primary pathways: downsampling and upsampling, followed by an output convolution. Due to its U-shape, the architecture is named U-Net. Its symmetric architecture ensures that the context (from downsampling) and the localization (from upsampling) are captured effectively. + +```python +import torch + +from zeta.nn import Unet + +# Initialize the U-Net model +model = Unet(n_channels=1, n_classes=2) + +# Random input tensor with dimensions [batch_size, channels, height, width] +x = torch.randn(1, 1, 572, 572) + +# Forward pass through the model +y = model(x) + +# Output +print(f"Input shape: {x.shape}") +print(f"Output shape: {y.shape}") +``` + + +### `VisionEmbeddings` +The VisionEmbedding class is designed for converting images into patch embeddings, making them suitable for processing by transformer-based models. This class plays a crucial role in various computer vision tasks and enables the integration of vision data into transformer architectures! + +```python +import torch + +from zeta.nn import VisionEmbedding + +# Create an instance of VisionEmbedding +vision_embedding = VisionEmbedding( + img_size=224, + patch_size=16, + in_chans=3, + embed_dim=768, + contain_mask_token=True, + prepend_cls_token=True, +) + +# Load an example image (3 channels, 224x224) +input_image = torch.rand(1, 3, 224, 224) + +# Perform image-to-patch embedding +output = vision_embedding(input_image) + +# The output now contains patch embeddings, ready for input to a transformer model +``` + + +### `niva` +- Niva focuses on weights of certain layers (specified by quantize_layers). Ideal for models where runtime activation is variable. đī¸ Example Layers: nn.Embedding, nn.LSTM. + +```python +import torch + +from zeta import niva + +# Load a pre-trained model +model = YourModelClass() + +# Quantize the model dynamically, specifying layers to quantize +niva( + model=model, + model_path="path_to_pretrained_model_weights.pt", + output_path="quantized_model.pt", + quant_type="dynamic", + quantize_layers=[nn.Linear, nn.Conv2d], + dtype=torch.qint8, +) +``` + + +### `FusedDenseGELUDense` +- Increase model speed by 2x with this module that fuses together 2 hyper-optimized dense ops from bits and bytes and a gelu together! + +```python +import torch + +from zeta.nn import FusedDenseGELUDense + +x = torch.randn(1, 512) +model = FusedDenseGELUDense(512, 1024) +out = model(x) +out.shape +``` + + +### `FusedDropoutLayerNorm` +- FusedDropoutLayerNorm is a fused kernel of dropout and layernorm to speed up FFNs or MLPS by 2X + +```python +import torch +from torch import nn + +from zeta.nn import FusedDropoutLayerNorm + +# Initialize the module +model = FusedDropoutLayerNorm(dim=512) + +# Create a sample input tensor +x = torch.randn(1, 512) + +# Forward pass +output = model(x) + +# Check output shape +print(output.shape) # Expected: torch.Size([1, 512]) +``` + + +### `Mamba` +- Pytorch implementation of the new SSM model architecture Mamba + +```python +import torch + +from zeta.nn import MambaBlock + +# Initialize Mamba +block = MambaBlock(dim=64, depth=1) + +# Random input +x = torch.randn(1, 10, 64) + +# Apply the model to the block +y = block(x) + +print(y.shape) +# torch.Size([1, 10, 64]) +``` + +### `FiLM` + +```python +import torch + +from zeta.nn import Film + +# Initialize the Film layer +film_layer = Film(dim=128, hidden_dim=64, expanse_ratio=4) + +# Create some dummy data for conditions and hiddens +conditions = torch.randn(10, 128) # Batch size is 10, feature size is 128 +hiddens = torch.randn( + 10, 1, 128 +) # Batch size is 10, sequence length is 1, feature size is 128 + +# Pass the data through the Film layer +modulated_features = film_layer(conditions, hiddens) + +# Print the shape of the output +print(modulated_features.shape) # Should be [10, 1, 128] +``` + +### `hyper_optimize` +- A single wrapper for torch.fx, torch.script, torch.compile, dynamic quantization, mixed precision through torch.amp, with execution time metrics all in once place! +```python +import torch + +from zeta.nn import hyper_optimize + + +@hyper_optimize( + torch_fx=False, + torch_script=False, + torch_compile=True, + quantize=True, + mixed_precision=True, + enable_metrics=True, +) +def model(x): + return x @ x + + +out = model(torch.randn(1, 3, 32, 32)) +print(out) +``` + + +### DPO - Direct Policy Optimization +Direct Policy Optimization employed for many RLHF applications for LLMs. + +```python +import torch +from torch import nn + +from zeta.rl import DPO + + +# Define a simple policy model +class PolicyModel(nn.Module): + def __init__(self, input_dim, output_dim): + super().__init__() + self.fc = nn.Linear(input_dim, output_dim) + + def forward(self, x): + return self.fc(x) + + +input_dim = 10 +output_dim = 5 +policy_model = PolicyModel(input_dim, output_dim) + +# Initialize DPO with the policy model +dpo_model = DPO(model=policy_model, beta=0.1) + +# Sample preferred and unpreferred sequences +preferred_seq = torch.randint(0, output_dim, (3, input_dim)) +unpreferred_seq = torch.randint(0, output_dim, (3, input_dim)) + +# Compute loss +loss = dpo_model(preferred_seq, unpreferred_seq) +print(loss) +``` + + + # Documentation -[Click here for the documentation, it's at zeta.apac.ai](https://zeta.apac.ai) +All classes must have documentation if you see a class or function without documentation then please report it to me at kye@apac.ai, + +Documentation is at [zeta.apac.ai](https://zeta.apac.ai/) + + +------- + + +# Running tests +You should install the pre-commit hooks with pre-commit install. This will run the linter, mypy, and a subset of the tests on every commit. + +For more examples on how to run the full test suite please refer to the CI workflow. + +Some examples of running tests locally: + +```bash +python3 -m pip install -e '.[testing]' # install extra deps for testing +python3 -m pytest tests/ # whole test suite +``` +---- + +## Community + +Join our growing community around the world, for real-time support, ideas, and discussions on how to build better models đ -# Vision -Zeta hopes to be the leading framework and library to effortlessly enable you to create the most capable and reliable foundation models out there with infinite scalability in as minmal amounts of code as possible +- View our official [Docs](https://zeta.apac.ai) +- Chat live with us on [Discord](https://discord.gg/kS3rwKs3ZC) +- Follow us on [Twitter](https://twitter.com/kyegomez) +- Connect with us on [LinkedIn](https://www.linkedin.com/company/the-swarm-corporation) +- Visit us on [YouTube](https://www.youtube.com/channel/UC9yXyitkbU_WSy7bd_41SqQ) +- [Join the Swarms community on Discord!](https://discord.gg/AJazBmhKnr) +--- -## Contributing -We're dependent on you for contributions, it's only Kye maintaining this repository and it's very difficult and with that said any contribution is infinitely appreciated by not just me but by Zeta's users who dependen on this repository to build the world's -best AI models +# đ¤ Schedule a 1-on-1 Session +Want to train a custom AI model for a real-world task like General Multi-Modal Models, Facial Recognitions, Drug Discovery, Humanoid Robotics? I'll help you create the model architecture then train the model and then optimize it to meet your quality assurance standards. + +Book a [1-on-1 Session with Kye here.](https://calendly.com/apacai/agora), the Creator, to discuss any issues, provide feedback, or explore how we can improve Zeta for you or help you build your own custom models! + +## đĢļ Contributions: + +The easiest way to contribute is to pick any issue with the `good first issue` tag đĒ. Read the Contributing guidelines [here](/CONTRIBUTING.md). Bug Report? [File here](https://github.com/kyegomez/zeta/issues/new/choose) | Feature Request? [File here](https://github.com/kyegomez/zeta/issues/new/choose) + +Zeta is an open-source project, and contributions are VERY welcome. If you want to contribute, you can create new features, fix bugs, or improve the infrastructure. Please refer to the [CONTRIBUTING.md](https://github.com/kyegomez/zeta/blob/master/CONTRIBUTING.md) and our [contributing board](https://github.com/users/kyegomez/projects/1) to participate in Roadmap discussions! + + + + -* Head over to the project board to look at open features to implement or bugs to tackle +---- -## Project Board -[This weeks iteration is here](https://github.com/users/kyegomez/projects/7/views/2) +## Accelerate Backlog +Help us accelerate our backlog by supporting us financially! Note, we're an open source corporation and so all the revenue we generate is through donations at the moment ;) + + + + +# License +- Apache + + +# Citation +```bibtex +@misc{zetascale, + title = {Zetascale Framework}, + author = {Kye Gomez}, + year = {2024}, + howpublished = {\url{https://github.com/kyegomez/zeta}}, +} +``` diff --git a/docs/.DS_Store b/docs/.DS_Store deleted file mode 100644 index ae895dff..00000000 Binary files a/docs/.DS_Store and /dev/null differ diff --git a/docs/applications/customer_support.md b/docs/applications/customer_support.md deleted file mode 100644 index a5a62f70..00000000 --- a/docs/applications/customer_support.md +++ /dev/null @@ -1,42 +0,0 @@ -## **Applications of Zeta: Revolutionizing Customer Support** - ---- - -**Introduction**: -In today's fast-paced digital world, responsive and efficient customer support is a linchpin for business success. The introduction of AI-driven zeta in the customer support domain can transform the way businesses interact with and assist their customers. By leveraging the combined power of multiple AI agents working in concert, businesses can achieve unprecedented levels of efficiency, customer satisfaction, and operational cost savings. - ---- - -### **The Benefits of Using Zeta for Customer Support:** - -1. **24/7 Availability**: Zeta never sleep. Customers receive instantaneous support at any hour, ensuring constant satisfaction and loyalty. - -2. **Infinite Scalability**: Whether it's ten inquiries or ten thousand, zeta can handle fluctuating volumes with ease, eliminating the need for vast human teams and minimizing response times. - -3. **Adaptive Intelligence**: Zeta learn collectively, meaning that a solution found for one customer can be instantly applied to benefit all. This leads to constantly improving support experiences, evolving with every interaction. - ---- - -### **Features - Reinventing Customer Support**: - -- **AI Inbox Monitor**: Continuously scans email inboxes, identifying and categorizing support requests for swift responses. - -- **Intelligent Debugging**: Proactively helps customers by diagnosing and troubleshooting underlying issues. - -- **Automated Refunds & Coupons**: Seamless integration with payment systems like Stripe allows for instant issuance of refunds or coupons if a problem remains unresolved. - -- **Full System Integration**: Holistically connects with CRM, email systems, and payment portals, ensuring a cohesive and unified support experience. - -- **Conversational Excellence**: With advanced LLMs (Language Model Transformers), the swarm agents can engage in natural, human-like conversations, enhancing customer comfort and trust. - -- **Rule-based Operation**: By working with rule engines, zeta ensure that all actions adhere to company guidelines, ensuring consistent, error-free support. - -- **Turing Test Ready**: Crafted to meet and exceed the Turing Test standards, ensuring that every customer interaction feels genuine and personal. - ---- - -**Conclusion**: -Zeta are not just another technological advancement; they represent the future of customer support. Their ability to provide round-the-clock, scalable, and continuously improving support can redefine customer experience standards. By adopting zeta, businesses can stay ahead of the curve, ensuring unparalleled customer loyalty and satisfaction. - -**Experience the future of customer support. Dive into the swarm revolution.** - diff --git a/docs/applications/marketing_agencies.md b/docs/applications/marketing_agencies.md deleted file mode 100644 index f38614bc..00000000 --- a/docs/applications/marketing_agencies.md +++ /dev/null @@ -1,64 +0,0 @@ -## **Zeta in Marketing Agencies: A New Era of Automated Media Strategy** - ---- - -### **Introduction**: -- Brief background on marketing agencies and their role in driving brand narratives and sales. -- Current challenges and pain points faced in media planning, placements, and budgeting. -- Introduction to the transformative potential of zeta in reshaping the marketing industry. - ---- - -### **1. Fundamental Problem: Media Plan Creation**: - - **Definition**: The challenge of creating an effective media plan that resonates with a target audience and aligns with brand objectives. - - - **Traditional Solutions and Their Shortcomings**: Manual brainstorming sessions, over-reliance on past strategies, and long turnaround times leading to inefficiency. - - - **How Zeta Address This Problem**: - - **Benefit 1**: Automated Media Plan Generation â Zeta ingest branding summaries, objectives, and marketing strategies to generate media plans, eliminating guesswork and human error. - - **Real-world Application of Zeta**: The automation of media plans based on client briefs, including platform selections, audience targeting, and creative versions. - ---- - -### **2. Fundamental Problem: Media Placements**: - - **Definition**: The tedious task of determining where ads will be placed, considering demographics, platform specifics, and more. - - - **Traditional Solutions and Their Shortcomings**: Manual placement leading to possible misalignment with target audiences and brand objectives. - - - **How Zeta Address This Problem**: - - **Benefit 2**: Precision Media Placements â Zeta analyze audience data and demographics to suggest the best placements, optimizing for conversions and brand reach. - - **Real-world Application of Zeta**: Automated selection of ad placements across platforms like Facebook, Google, and DSPs based on media plans. - ---- - -### **3. Fundamental Problem: Budgeting**: - - **Definition**: Efficiently allocating and managing advertising budgets across multiple campaigns, platforms, and timeframes. - - - **Traditional Solutions and Their Shortcomings**: Manual budgeting using tools like Excel, prone to errors, and inefficient shifts in allocations. - - - **How Zeta Address This Problem**: - - **Benefit 3**: Intelligent Media Budgeting â Zeta enable dynamic budget allocation based on performance analytics, maximizing ROI. - - **Real-world Application of Zeta**: Real-time adjustments in budget allocations based on campaign performance, eliminating long waiting periods and manual recalculations. - ---- - -### **Features**: -1. Automated Media Plan Generator: Input your objectives and receive a comprehensive media plan. -2. Precision Media Placement Tool: Ensure your ads appear in the right places to the right people. -3. Dynamic Budget Allocation: Maximize ROI with real-time budget adjustments. -4. Integration with Common Tools: Seamless integration with tools like Excel and APIs for exporting placements. -5. Conversational Platform: A suite of tools built for modern marketing agencies, bringing all tasks under one umbrella. - ---- - -### **Testimonials**: -- "Zeta have completely revolutionized our media planning process. What used to take weeks now takes mere hours." - *Senior Media Strategist, Top-tier Marketing Agency* -- "The precision with which we can place ads now is unprecedented. It's like having a crystal ball for marketing!" - *Campaign Manager, Global Advertising Firm* - ---- - -### **Conclusion**: -- Reiterate the immense potential of zeta in revolutionizing media planning, placements, and budgeting for marketing agencies. -- Call to action: For marketing agencies looking to step into the future and leave manual inefficiencies behind, zeta are the answer. - ---- \ No newline at end of file diff --git a/docs/blog/introduction_to_zeta.md b/docs/blog/introduction_to_zeta.md new file mode 100644 index 00000000..cba56aff --- /dev/null +++ b/docs/blog/introduction_to_zeta.md @@ -0,0 +1,438 @@ +# Revolutionizing AI/ML with Zeta: The Quest for Truly Modular and Reusable Frameworks + +In the ever-evolving world of Artificial Intelligence and Machine Learning (AI/ML), researchers and engineers constantly seek more efficient and versatile tools to fuel their innovations. One persistent challenge is the lack of truly modular and reusable ML frameworks. This blog dives into the heart of this issue and introduces Zeta, a promising framework aiming to reshape the landscape of AI/ML development. + +## The Current State of AI/ML Development + +In the current AI/ML landscape, development often feels like navigating a maze without a map. Popular frameworks like PyTorch, TensorFlow, and Xformers are powerful but monolithic, making it challenging to swap components or experiment with cutting-edge modules. This lack of modularity results in a monumentally slow and cumbersome development process that hampers progress for researchers and engineers. + +### The Problems with Existing Frameworks + +Before we delve into the world of Zeta, let's take a closer look at the issues plaguing existing AI/ML frameworkss + +And, to provide a comprehensive understanding, let's analyze some of the most widely used frameworks, including PyTorch, TensorFlow, and Xformers. + +### PyTorch + +PyTorch, known for its dynamic computation graph, has gained immense popularity among researchers and developers. However, it too faces challenges in terms of modularity and reusability. + +| Problem | Description | +|---------------------------|----------------------------------------------------------------------------------------------------------| +| Monolithic Design | PyTorch follows a monolithic design, where most components are tightly integrated, limiting flexibility. | +| Lack of Standardization | The absence of standardized module interfaces makes it challenging to swap or extend components. | +| Limited Documentation | While PyTorch has a growing community, documentation gaps and inconsistencies hinder ease of use. | +| Versioning Complexity | Transitioning between PyTorch versions can be complex, causing compatibility issues for projects. | + +### TensorFlow + +TensorFlow, with its static computation graph, has been a cornerstone of AI/ML development. However, it too faces its share of challenges. + +| Problem | Description | +|---------------------------|----------------------------------------------------------------------------------------------------------| +| Rigidity in Graph | TensorFlow's static graph can be inflexible, especially when experimenting with different architectures. | +| Boilerplate Code | Developing models in TensorFlow often requires writing extensive boilerplate code, leading to clutter. | +| Deployment Complexity | TensorFlow models can be challenging to deploy due to their heavyweight nature and dependencies. | +| GPU Memory Management | Memory management for GPUs can be challenging, leading to out-of-memory errors during training. | + +### Xformers + +Xformers is a newer entrant, specifically designed for transformer-based models. While it brings innovations, it's not without its issues. + +| Problem | Description | +|---------------------------|----------------------------------------------------------------------------------------------------------| +| Limited Ecosystem | Xformers, being relatively new, has a smaller ecosystem compared to PyTorch and TensorFlow. | +| Lack of Pretrained Models| The availability of pretrained models and libraries for common tasks is limited compared to other frameworks. | +| Community Support | The community support for Xformers is growing but may not match the scale of PyTorch and TensorFlow. | +| Integration Challenges | Integrating Xformers with other components can be challenging due to its specialized nature. | + + +#### Lack of Modularity + +Traditional frameworks are designed as monolithic entities, where every component is tightly integrated. While this approach has its advantages, it severely limits modularity. Researchers and engineers cannot easily swap out components or experiment with new ones without diving deep into the framework's source code. This lack of modularity slows down innovation and collaboration. + +#### Complexity + +Existing frameworks are feature-rich, but this often results in excessive complexity. Beginners and even experienced developers can find themselves overwhelmed by the sheer number of options, configurations, and APIs. This complexity can lead to errors, increased development time, and a steep learning curve. + +#### Limited Standardization + +AI/ML is a rapidly evolving field, with new research and techniques emerging regularly. Existing frameworks struggle to keep pace with these advancements, leading to limited support for new modules and models. This lack of standardization makes it challenging for researchers to implement and share their cutting-edge work. + +#### Reliability and Documentation + +Reliability is a critical aspect of any development framework. However, many existing frameworks suffer from stability issues, making it challenging to deploy models in production. Additionally, documentation can be sparse or outdated, making it difficult for developers to understand and use the framework effectively. + +## The Vision of Modular and Reusable ML Frameworks + +Imagine a world where AI/ML development is as effortless as snapping together Lego blocks. In this vision, researchers and engineers can quickly experiment with the latest modules, combine them like building blocks, and create extremely powerful AI models. This modular approach not only accelerates development but also promotes collaboration and knowledge sharing. + +## The Journey Towards Modular and Reusable ML Frameworks + +The journey towards modular and reusable ML frameworks has been fraught with challenges such as lack of reliability, documentation, and a plethora of vast arrays of issues. Researchers and engineers have been searching for a solution, but progress has been slow. Let's examine some of the key challenges: + +### Lack of Reliability + +Reliability is paramount in AI/ML development. Existing frameworks may have stability issues that lead to unexpected crashes or incorrect results. Researchers and engineers need tools they can rely on to conduct experiments and deploy models with confidence. + +### Documentation Woes + +Comprehensive and up-to-date documentation is essential for any framework. It provides developers with the information they need to understand the framework's capabilities and use it effectively. Inadequate documentation can lead to frustration and hinder the adoption of a framework. + +### Compatibility and Integration + +The AI/ML ecosystem is vast, with various libraries and tools available. Frameworks need to be compatible with other tools and libraries to facilitate seamless integration. Incompatibility issues can create roadblocks for developers trying to incorporate new modules or techniques into their workflows. + +### Steep Learning Curve + +The complexity of existing frameworks often results in a steep learning curve for newcomers. Developers must invest significant time and effort in mastering the intricacies of these frameworks, slowing down their ability to contribute meaningfully to AI/ML research. + +### Lack of Modularity + +As mentioned earlier, the lack of modularity in existing frameworks hinders experimentation and innovation. Researchers often resort to implementing custom solutions or working within the constraints of the framework, limiting their ability to explore new ideas. + +## Introducing Zeta: The Future of AI/ML Development + +And now, allow me to introduce Zeta to you, a game-changing AI/ML framework designed with modularity and reusability at its core. Zeta's design principles include fluid experimentation, production-grade reliability, and modularity. Getting started with Zeta is as simple as running `pip install zetascale`. This one-liner sets you on a journey to a new era of AI/ML developmentâa seamless voyaging experience that allows you to set sail across the vast seas of tensors and latent spaces! + +Let's explore Zeta's key features and how it addresses the challenges posed by existing frameworks: + +### Zeta's Key Features + +Zeta is more than just a framework; it's a vision for the future of AI/ML development. Here are some of its key features: + +#### Fluid Experimentation + +Zeta makes it effortless for researchers and industrial AI engineers to rapidly experiment with the latest modules and components. Whether you're interested in MultiGroupedQueryAttention or Unet, Zeta provides the building blocks for your AI experiments. + +#### Production-Grade Reliability + +Reliability is at the core of Zeta's design. It aims to facilitate reproducibility while delivering bleeding-edge performance. This reliability ensures that your AI models can transition seamlessly from research to production. + +#### Modularity + +Zeta's modularized Lego building blocks empower you to build and deploy the best ML models. You can mix and match components, experiment with new modules, and create custom solutions with ease. Modularity is the key to unlocking innovation. + +### Exploring Zeta in Action + +Let's dive into Zeta's capabilities with practical examples and explore how it empowers AI/ML development: + +#### Installation + +Getting started with Zeta is as simple as running a single command: + +```shell +pip install zetascale +``` + +With Zeta, you can kickstart your AI/ML journey within minutes. + +#### Initiating Your Journey with FlashAttention + +To demonstrate the power of Zeta, let's take a closer look at its `FlashAttention` module: + +```python +import torch + +from zeta.nn.attention import FlashAttention + +q = torch.randn(2, 4, 6, 8) +k = torch.randn(2, 4, 10, 8) +v = torch.randn(2, 4, 10, 8) + +attention = FlashAttention(causal=False, dropout=0.1, flash=True) +output = attention(q, k, v) + +print(output.shape) +``` + +The `FlashAttention` module empowers your models with cutting-edge attention mechanisms effortlessly. + +#### Enhancing Attention with RelativePositionBias + +Zeta's `RelativePositionBias` quantizes the distance between positions and provides biases based on relative positions. This mechanism enhances the attention mechanism by considering relative positions between the query and key, rather than relying solely on their absolute positions: + +```python +from zeta.nn import RelativePositionBias +import torch + +rel_pos_bias = RelativePositionBias() + +# Example 1: Compute bias for a single batch +bias_matrix = rel_pos_bias(1, 10, 10) + +# Example 2: Integrate with an attention mechanism +class MockAttention(nn.Module): + def __init__(self): + super().__ + +init__() + self.rel_pos_bias = RelativePositionBias() + + def forward(self, queries, keys): + bias = self.rel_pos_bias(queries.size(0), queries.size(1), keys.size(1)) + # Further computations with bias in the attention mechanism... + return None # Placeholder +``` + +#### Streamlining FeedForward Operations with FeedForward + +Zeta's `FeedForward` module simplifies feedforward operations in neural networks: + +```python +from zeta.nn import FeedForward + +model = FeedForward(256, 512, glu=True, post_act_ln=True, dropout=0.2) + +x = torch.randn(1, 256) + +output = model(x) +print(output.shape) +``` + +#### Achieving Linear Transformation with BitLinear + +Zeta's `BitLinear` module combines linear transformation with quantization and dequantization: + +```python +import torch +from torch import nn + +import zeta.quant as qt + + +class MyModel(nn.Module): + def __init__(self): + super().__init__() + self.linear = qt.BitLinear(10, 20) + + def forward(self, x): + return self.linear(x) + + +model = MyModel() + +input = torch.randn(128, 10) + +output = model(input) + +print(output.size()) +``` + +#### Multi-Modal Capabilities with PalmE + +Zeta's `PalmE` is a multi-modal transformer architecture that opens new possibilities in AI/ML: + +```python +import torch + +from zeta.structs import ( + AutoRegressiveWrapper, + Decoder, + Encoder, + Transformer, + ViTransformerWrapper, +) + +# Usage with random inputs +img = torch.randn(1, 3, 256, 256) +text = torch.randint(0, 20000, (1, 1024)) + +model = PalmE() +output = model(img, text) +print(output) +``` + +#### Unleashing U-Net for Image Segmentation + +Zeta's `Unet` brings the power of convolutional neural networks for image segmentation: + +```python +import torch + +from zeta.nn import Unet + +model = Unet(n_channels=1, n_classes=2) + +x = torch.randn(1, 1, 572, 572) + +y = model(x) + +print(f"Input shape: {x.shape}") +print(f"Output shape: {y.shape}") +``` + +#### VisionEmbeddings for Computer Vision + +Zeta's `VisionEmbedding` class transforms images into patch embeddings for transformer-based models: + +```python +import torch + +from zeta.nn import VisionEmbedding + +vision_embedding = VisionEmbedding( + img_size=224, + patch_size=16, + in_chans=3, + embed_dim=768, + contain_mask_token=True, + prepend_cls_token=True, +) + +input_image = torch.rand(1, 3, 224, 224) + +output = vision_embedding(input_image) +``` + +### A Comparative Analysis of Zeta and Other Frameworks + +To truly appreciate Zeta's impact on AI/ML development, let's conduct a detailed comparative analysis of Zeta and other popular frameworks, including PyTorch, TensorFlow, and Xformers. We'll evaluate these frameworks based on various criteria: + +#### Modularity + +| Framework | Modularity Score (1-5) | Comments | +|--------------|------------------------|---------------------------------------------------| +| Zeta | 5 | Exceptional modularity and flexibility. | +| PyTorch | 3 | Modularity but lacks easy component swapping. | +| TensorFlow | 3 | Modularity but can be complex for beginners. | +| Xformers | 4 | Strong modularity but focused on transformers. | + +#### Complexity + +| Framework | Complexity Score (1-5) | Comments | +|--------------|------------------------|---------------------------------------------------| +| Zeta | 4 | Powerful but user-friendly. | +| PyTorch | 5 | Feature-rich but can be complex. | +| TensorFlow | 4 | Extensive features, moderate complexity. | +| Xformers | 3 | Simplified for transformer-based models. | + +#### Compatibility + +| Framework | Compatibility Score (1-5) | Comments | +|--------------|---------------------------|---------------------------------------------------| +| Zeta | 4 | Compatible but still evolving ecosystem. | +| PyTorch | 5 | Broad compatibility with many libraries. | +| TensorFlow | 5 | Extensive compatibility with AI/ML tools. | +| Xformers | 3 | Specialized for transformer-based tasks. | + +#### Documentation + +| Framework | Documentation Score (1-5) | Comments | +|--------------|----------------------------|---------------------------------------------------| +| Zeta | 4 | Good documentation but room for expansion. | +| PyTorch | 5 | Extensive and well-maintained documentation. | +| TensorFlow | 4 | Solid documentation but can be overwhelming. | +| Xformers | 3 | Documentation primarily focused on transformers. | + +#### Reliability + +| Framework | Reliability Score (1-5) | Comments | +|--------------|-------------------------|---------------------------------------------------| +| Zeta | 4 | High reliability with room for improvement. | +| PyTorch | 5 | Proven reliability and stability. | +| TensorFlow | 4 | Generally reliable but occasional issues. | +| Xformers | 3 | Reliability may vary for specialized tasks. | + +#### Learning Curve + +| Framework | Learning Curve Score (1-5) | Comments | +|--------------|----------------------------|---------------------------------------------------| +| Zeta | 4 | Moderate learning curve, user-friendly. | +| PyTorch | 3 | Steeper learning curve, especially for beginners. | +| TensorFlow | 3 | Moderate learning curve but can be complex. | +| Xformers | 4 | Moderate learning curve, focused on transformers. | + +### Modularity Index Across Modules + +Zeta's approach to modularity allows researchers and engineers to easily swap and combine modules to create powerful AI models. Let's explore some of Zeta's key modules and how they compare to their counterparts in other frameworks. + +#### FlashAttention vs. Standard Attention Mechanisms + +Zeta introduces `FlashAttention`, a module that empowers models with cutting-edge attention mechanisms effortlessly. Let's compare it to standard attention mechanisms in PyTorch and TensorFlow. + +| Aspect | FlashAttention (Zeta) | Standard Attention (PyTorch/TensorFlow) | +|-----------------------------|----------------------------------------|----------------------------------------| +| Modularity | Easily integrated into Zeta workflows | Often tightly coupled with the framework | +| Cutting-edge Features | Supports the latest attention research | May require custom implementations | +| Code Simplicity | Simplifies code with its module design | May involve complex code structures | +| Documentation | Well-documented for ease of use | Documentation may vary in quality | + +#### RelativePositionBias vs. Positional Embeddings + +Zeta's `RelativePositionBias` quantizes the distance between positions and provides biases based on relative positions. This enhances attention mechanisms. Let's compare it to traditional positional embeddings. + +| Aspect | RelativePositionBias (Zeta) | Positional Embeddings (PyTorch/TensorFlow) | +|-----------------------------|----------------------------------------|--------------------------------------------| +| Enhanced Attention | Improves attention with relative bias | Relies solely on absolute positions | +| Flexibility | Adaptable to various tasks | May require different embeddings for tasks | +| Integration | Seamlessly integrated into Zeta | Integration may require additional code | +| Performance | May lead to more efficient models | Performance may vary depending on usage | + +#### FeedForward vs. Standard MLP + +Zeta's `FeedForward` module simplifies feedforward operations in neural networks. Let's compare it to the standard multilayer perceptron (MLP) in PyTorch and TensorFlow. + +| Aspect | FeedForward (Zeta) | Standard MLP (PyTorch/TensorFlow) | +|-----------------------------|----------------------------------------|----------------------------------| +| Integration | Easily integrated into Zeta workflows | May require custom MLP layers | +| Activation Functions | Supports customizable activation funcs | Requires additional code for custom activations | +| Code Clarity | Streamlines code with its module design| Code structure can be more complex | +| Performance | May offer optimized performance | Performance depends on implementation | + +#### BitLinear vs. Linear Layers + +Zeta's `BitLinear` module combines linear transformation with quantization and dequantization. Let's compare it to standard linear layers in PyTorch and TensorFlow. + +| Aspect | BitLinear (Zeta) | Standard Linear Layers (PyTorch/TensorFlow) | +|-----------------------------|----------------------------------------|---------------------------------------------| +| Quantization | Utilizes quantization for efficient ops| Linear layers perform full-precision ops | +| Memory Efficiency | Efficient memory use with quantization | May consume more memory | +| Training Speed | May speed up training with + + quantization| Training speed may be affected by ops | +| Code Integration | Seamlessly integrated into Zeta | Integration may require additional code | + +### PalmE: Multi-Modal Transformer + +Zeta's `PalmE` is a multi-modal transformer architecture that opens new possibilities in AI/ML. It's worth examining how it stacks up against other transformer-based models. + +| Aspect | PalmE (Zeta) | Transformer-based Models (Other Frameworks) | +|-----------------------------|-------------------------------------|----------------------------------------------| +| Multi-Modality Support | Designed for multi-modal tasks | May require extensive customization for multi-modal tasks | +| Attention Mechanism | Incorporates advanced attention mechanisms | Attention mechanisms vary across models | +| Ease of Use | Simplifies multi-modal model development | Building similar models in other frameworks may be more complex | +| Performance | Performance may be competitive with state-of-the-art models | Performance depends on specific models and tasks | + +### Unet: Image Segmentation + +Zeta's `Unet` brings the power of convolutional neural networks (CNNs) for image segmentation. Let's see how it compares to other image segmentation approaches. + +| Aspect | Unet (Zeta) | Image Segmentation Models (Other Frameworks) | +|-----------------------------|-------------------------------------|----------------------------------------------| +| Architecture | Follows the U-Net architecture | Various architectures available for image segmentation | +| Versatility | Adaptable to different segmentation tasks | May require specific models for different tasks | +| Code Reusability | Encourages reusing Unet for diverse projects | Code reuse may be limited in some cases | +| Performance | Performance comparable to traditional models | Performance depends on specific models and datasets | + +### VisionEmbeddings: Transformer-Friendly Image Processing + +Zeta's `VisionEmbedding` class transforms images into patch embeddings for transformer-based models. Let's evaluate its role compared to traditional image preprocessing. + +| Aspect | VisionEmbedding (Zeta) | Traditional Image Preprocessing (Other Frameworks) | +|-----------------------------|-------------------------------------|---------------------------------------------------| +| Integration | Seamlessly integrates with Zeta | Image preprocessing may involve additional steps | +| Compatibility | Tailored for transformer architectures | Preprocessing methods depend on model choice | +| Ease of Use | Simplifies image-to-patch embedding | Image preprocessing may require more effort | +| Performance | Supports efficient transformer-based processing | Performance varies based on preprocessing methods | + +## The Future of AI/ML with Zeta + +Zeta is not just a framework; it's a vision. Led by experts like Kye, the Creator, Zeta's team is committed to revolutionizing AI/ML development. With its unique design and powerful modules, Zeta is poised to reshape the future of AI/ML frameworks. + +## Conclusion + +The journey towards modular and reusable AI/ML frameworks has been long, but Zeta offers a promising path forward. With its modular design, powerful modules, and visionary team, Zeta stands ready to usher in a new era of AI/ML development. Are you ready to embrace the future of AI engineering? Install Zeta now with `pip install zetascale` + +## Documentation + +Explore Zeta further by visiting the [Zeta documentation](zeta.apac.ai) for in-depth information and guidance. diff --git a/docs/architecture.md b/docs/corporate/architecture.md similarity index 100% rename from docs/architecture.md rename to docs/corporate/architecture.md diff --git a/docs/bounties.md b/docs/corporate/bounties.md similarity index 100% rename from docs/bounties.md rename to docs/corporate/bounties.md diff --git a/docs/demos.md b/docs/corporate/demos.md similarity index 100% rename from docs/demos.md rename to docs/corporate/demos.md diff --git a/docs/design.md b/docs/corporate/design.md similarity index 100% rename from docs/design.md rename to docs/corporate/design.md diff --git a/docs/flywheel.md b/docs/corporate/flywheel.md similarity index 100% rename from docs/flywheel.md rename to docs/corporate/flywheel.md diff --git a/docs/corporate/growth.md b/docs/corporate/growth.md new file mode 100644 index 00000000..20eb6e9a --- /dev/null +++ b/docs/corporate/growth.md @@ -0,0 +1,21 @@ +# Growth + +To drive massive user adoption and unleash growth for the Zeta Framework, which is built on open source and distributed via platforms like GitHub and PyPI, a strategic plan involving repeatable activities is essential. These activities should focus on community engagement, continuous improvement, marketing, and partnerships. Here's a table outlining potential repeatable activities that could be key to achieving these goals: + +| Activity | Description | Frequency | Key Objectives | Expected Outcome | +|----------|-------------|-----------|----------------|------------------| +| Community Code Sprints | Organize regular coding events for contributing to the framework. | Bi-monthly | Engage the developer community, encourage contributions. | Increased contributions, enhanced framework features. | +| Webinar Series & Workshops | Host webinars and workshops on using and contributing to Zeta Framework. | Monthly | Educate potential users, showcase framework capabilities. | Higher user adoption, community education. | +| Regular Updates & Patches | Consistent release of updates and patches. | Bi-weekly / Monthly | Maintain a robust, up-to-date framework. | Trust and reliance in the frameworkâs utility. | +| Contributor Recognition Program | Implement a program to recognize and reward key contributors. | Quarterly | Motivate contributions, build a loyal community. | Increased community engagement, quality contributions. | +| Social Media Engagement | Active promotion and engagement on platforms like Twitter, LinkedIn, Reddit. | Daily / Weekly | Increase visibility, create buzz. | Greater awareness, attracting new users. | +| Collaboration with Educational Institutions | Partner with universities for curriculum integration and research. | Bi-annually | Promote academic use, foster new talent. | Long-term user base growth, innovation. | +| User Experience Feedback Loops | Regular surveys and feedback sessions with users. | Quarterly | Understand user needs, improve framework. | Enhanced user satisfaction, framework improvement. | +| Blogging & Content Creation | Regular blog posts, tutorials, and use-case studies. | Weekly | Educate and engage with the community. | Higher engagement, SEO benefits. | +| Plugin/Extension Development | Encourage and support the development of plugins/extensions. | As needed | Expand framework capabilities, cater to diverse needs. | Enhanced functionality, broader appeal. | +| Partnership with Industry Leaders | Forge partnerships for co-development or integration. | Annually | Gain credibility, access new markets. | Broader industry acceptance, new user segments. | +| Open Source Conferences | Participate in or sponsor open source conferences. | Annually | Network, showcase framework. | Increased visibility, network expansion. | +| User Group and Meetup Formation | Facilitate the creation of user groups and meetups globally. | Quarterly | Foster a sense of community, local engagement. | Stronger, localized community support networks. | +| Continuous Benchmarking | Regularly benchmark against competing frameworks. | Bi-annually | Stay competitive, identify improvement areas. | Framework optimization, staying ahead of competition. | + +This strategy aims to build a strong, engaged community around Zeta Framework, continuously improve and update the framework, and increase its visibility and credibility in both the academic and industrial sectors. Through these activities, the goal is to create a sustainable growth model that leverages the power of the open-source community. diff --git a/docs/corporate/main.md b/docs/corporate/main.md new file mode 100644 index 00000000..f9216596 --- /dev/null +++ b/docs/corporate/main.md @@ -0,0 +1,63 @@ +# **Zeta Mission Statement: Pioneering a Future Where AI is for Everyone** + + +--- + +**Introduction:** + +In an era where artificial intelligence is reshaping every facet of human life, Zeta Framework emerges as a beacon of empowerment and innovation. Our vision transcends the traditional boundaries of technology, envisioning a future where the transformative power of AI is a common tool, accessible and usable by all. Our mission is to demystify the complexities of AI model development, rendering it a straightforward, inclusive, and universally accessible endeavor. + +--- + +**Our Grand Purpose:** + +Zeta Framework is dedicated to a singular, noble purpose: to enable every individual, from the tech-savvy developer in Silicon Valley to the aspiring innovator in remote corners of the world, to create AI models that are not just efficient and effective, but also ethical and empowering. We are not just developing a technology; we are nurturing a vision to uplift humanity, bridge digital divides, and democratize the very essence of technological advancement. + +--- + +**Guiding Principles:** + +1. **Modularity: Embracing Diversity in Innovation** + - Our commitment to modularity is not just about technical flexibility; itâs about honoring the diverse needs and visions of our users. We provide a canvas where every stroke of innovation can find its space. + +2. **Extreme Reliability: A Foundation You Can Trust** + - Zeta Framework stands as a pillar of reliability. We understand that the backbone of impactful technology is trust, and we embed this trust in every line of code, ensuring that our framework is a dependable ally in your AI journey. + +3. **Bleeding Edge Performance: Pushing the Boundaries of the Possible** + - Our pursuit of bleeding-edge performance is relentless. We are constantly scouring the horizon for innovations, integrating them to ensure that our users are always equipped with the best tools to conquer the AI frontier. + +4. **Community Collaboration: Cultivating a Global AI Family** + - We believe in the power of collective intelligence. Our framework is a testament to the spirit of global collaboration, bringing together minds from across the globe to forge a path of shared growth and learning. + +5. **Ethical AI Development: Championing a Responsible Future** + - Our commitment to ethical AI is unwavering. We recognize the profound impact of AI on society and are dedicated to ensuring that our framework upholds the highest standards of fairness, transparency, and respect for human dignity. + +6. **Accessibility and Ease of Use: Making AI a Universal Language** + - We are steadfast in our mission to make AI as accessible as possible. Zeta Framework is designed to be intuitive, removing barriers and opening doors to a world where AI is a universal language, spoken and understood by all. + +7. **Continuous Learning and Improvement: Evolving with You** + - The journey of AI is one of perpetual evolution, and so is our framework. We are committed to a philosophy of continuous learning and improvement, ensuring that Zeta Framework not only adapts to the changing landscape of technology but also to the evolving needs of our users. + +8. **Inclusive Innovation: Building for a Diverse World** + - At Zeta, we recognize the rich tapestry of human diversity. Our framework is designed with an inclusive lens, ensuring that it caters to a wide spectrum of cultures, abilities, and backgrounds. + +9. **Sustainable Development: AI for a Greener Tomorrow** + - We acknowledge our responsibility towards the planet. Our commitment to sustainable AI development guides our operational and technological decisions, aiming to minimize environmental impact and promote sustainability. + +--- + +**Our Aspiration:** + +In embracing these principles, Zeta Framework aspires to be more than a technological solution; it aims to be a movement. A movement that heralds a new era where AI is not a privilege of the few but a right of the many. A movement that stands on the pillars of empowerment, equality, and ethical responsibility. We are not just building a framework; we are crafting the future of AI, a future where technology is an equal partner in human progress. + +--- + +**Endorsement:** + +*With a Vision for Tomorrow,* +Kye Gomez, Supreme Leader of the Zeta Framework + +--- + +*Date:* December 17, 2023 + diff --git a/docs/purpose.md b/docs/corporate/purpose.md similarity index 100% rename from docs/purpose.md rename to docs/corporate/purpose.md diff --git a/docs/roadmap.md b/docs/corporate/roadmap.md similarity index 100% rename from docs/roadmap.md rename to docs/corporate/roadmap.md diff --git a/docs/corporate/zeta_cloud.md b/docs/corporate/zeta_cloud.md new file mode 100644 index 00000000..61cce3e1 --- /dev/null +++ b/docs/corporate/zeta_cloud.md @@ -0,0 +1,165 @@ +**Zeta Cloud: AI Model Training and Deployment Made Easy** + +--- + +**Description: What is it?** +Zeta Cloud is an innovative cloud-based service that simplifies the process of training and deploying AI models. By allowing AI engineers to simply specify the file they want to run, Zeta Cloud takes care of the rest - from model training on powerful cloud infrastructure to seamless deployment. + +--- + +**Problem: What problem is this solving?** +Many AI engineers and data scientists face significant hurdles in model training and deployment, including complexities in setting up infrastructure, managing resources, and ensuring scalability. Zeta Cloud addresses these challenges by providing a streamlined, efficient, and user-friendly platform. + +--- + +**Why: How do we know this is a real problem and worth solving?** +Feedback from the AI community, market research, and demand trends in cloud computing and AI as a Service (AIaaS) indicate a substantial need for simplified model training and deployment solutions. The growing adoption of AI across industries further validates this need. + +--- + +**Success: How do we know if weâve solved this problem?** +Success will be measured by user adoption rates, customer satisfaction scores, reduction in time and effort for model training and deployment, and positive feedback from the AI engineering community. + +--- + +**Audience: Who are we building for?** +Zeta Cloud is designed for AI engineers, data scientists, startups, and enterprises who want to focus on model development without the overhead of managing cloud infrastructure and deployment complexities. + +--- + +**What: Roughly, what does this look like in the product?** +In the product, users will find a straightforward interface where they can upload their AI model files and specify any required parameters. The platform then automatically allocates resources, trains the model, and deploys it, providing users with an endpoint for easy access and integration. + +--- + +**How: What is the experiment plan?** +The plan includes initial beta testing with select users, gathering feedback, and iteratively improving the service. A phased rollout will follow, starting with basic model training and deployment capabilities, gradually incorporating more advanced features based on user input and technological advancements. + +--- + +**When: When does it ship and what are the milestones?** +The estimated timeline for shipping Zeta Cloud is as follows: +- Beta Testing: Q1 2024 +- Initial Release: Q3 2024 +- Feature Expansion: Q1 2025 +- Full-Scale Deployment: Q3 2025 + +--- + +**Revenue Streams/Cashflows for Zeta Cloud:** + +| Revenue Stream | Description | Target Market | Pricing Model | +|----------------|-------------|---------------|---------------| +| Subscription for Basic Access | Access to basic model training and deployment capabilities. | Individual developers, small startups. | Monthly/Annual subscription. | +| Premium Subscription | Advanced features like higher computing resources, priority support, and more. | Mid-sized companies, enterprises. | Tiered monthly/annual subscription based on usage. | +| Pay-Per-Use Model | Charges based on the amount of computing resources used and the number of model deployments. | Businesses with variable usage. | Charged per resource unit or deployment. | +| Custom Solutions | Tailored solutions for unique business needs, including specialized support and infrastructure. | Large enterprises with specific requirements. | Custom pricing based on the scope of services. | +| Training and Consultation Services | Expert training and consultation for AI model development and deployment. | Organizations new to AI, enterprises needing expertise. | Fixed fee for services or packaged with premium subscriptions. | +| Marketplace for Pre-Trained Models | A platform for users to buy, sell, or license pre-trained models. | AI developers, companies looking for ready-to-use models. | Transaction fees, subscription for premium listings. | +| Data Storage and Management | Integrated solutions for data storage, processing, and management. | All users of the platform. | Based on the amount of data stored/processed. | +| API Access for Third-Party Integrations | Providing API access for integration with other tools and services. | Developers, businesses needing integrations. | Monthly/Annual subscription or pay-per-use. | + + + + +# GTM - Go To Market + +### **Contents** + +1. Positioning Statement +2. Early Adopter Segments +3. Branding +4. Channel Strategy +5. Initial Marketing Methods +6. Testing Plan +7. LTV/CAC + +--- + +### **1. Positioning Statement** + +*For AI engineers and data scientists who struggle with the complexities of model training and deployment, Zeta Cloud is a new cloud-based AI service that simplifies these processes. Unlike traditional cloud services, we offer an automated, user-friendly platform with a strong focus on accessibility and efficiency.* + +--- + +### **2. Early Adopter Segments** + +**Segment Characteristics:** +- Demographics: AI engineers and data scientists in mid-sized tech companies and startups. +- Unmet Needs: Simplification of AI model deployment, efficient resource management, cost-effective scaling. +- Behaviors: Active users of cloud computing services, frequent participants in tech forums and communities. +- Psychographics: Value innovation, efficiency, and user-friendly interfaces. +- Multi-party Decision Making: End users (engineers and scientists), economic buyers (company executives), and key influencers (tech thought leaders and industry experts). + +**Implications for Targeted Marketing:** +- Focused engagement in tech forums and communities. +- Tailored content marketing addressing specific needs and pain points. +- Leveraging influencers and thought leaders to reach decision-makers. + +--- + +### **3. Branding** + +**Strengths of Product Name:** +- 'Zeta Cloud' conveys a sense of technological advancement and cloud-based efficiency. + +**Brand Association Words:** +- Innovative, Efficient, User-Friendly, Accessible, Empowering, Reliable. + +**Aspirational Brand Similarities:** +- Brands like AWS, Google Cloud, and Azure for their technological prowess and market presence. + +--- + +### **4. Channel Strategy** + +**Channels:** +- Own Website: Primary channel for direct sales and customer engagement. +- Sales Force: Blend of inside sales for smaller accounts and field sales for larger, enterprise-level deals. +- Channel Partners: Collaborations with tech marketplaces and value-added resellers. + +**Partner Responsibilities and Margins:** +- Education and initial engagement by Zeta Cloud, with partners focusing on closing sales and after-sales service. +- Attractive margins to incentivize partner engagement and commitment. + +--- + +### **5. Initial Marketing Methods** + +**Hypothesized Effective Methods:** +1. **Content Marketing:** Strength - establishes thought leadership; Weakness - time-intensive. +2. **Social Media and Community Engagement:** Strength - builds brand awareness; Weakness - requires consistent, high-quality engagement. +3. **Paid Digital Advertising (e.g., Google Ads, LinkedIn):** Strength - targets specific segments; Weakness - can be costly. + +**Performance Metrics:** +- Engagement rates, conversion rates, customer acquisition costs. + +**Secondary Marketing Methods:** +- Email marketing, PR activities, and webinars; secondary due to longer lead times and higher resource requirements. + +--- + +### **6. Testing Plan** + +**Completed Tests:** +- Initial A/B testing on website messaging and layout. + +**Upcoming Tests:** +- Content marketing effectiveness: Measuring engagement and conversion rates from different content types. +- Social media ad campaigns: Assessing customer acquisition costs and conversion rates. +- Budget for tests: Approximately $20,000 over three months. + +--- + +### **7. LTV/CAC** + +**LTV Targets:** +- Average annual revenue per customer: $5,000. +- Variable contribution margin: 70%. +- Retention rate: 85% annually. + +**CAC Projections:** +- Mix of free and paid methods: 40% free methods (referrals), 60% paid methods. +- Viral coefficient: 0.5. +- CAC for paid methods: $500 - $1,000, varying by channel. + diff --git a/docs/docs_prompt.md b/docs/docs_prompt.md deleted file mode 100644 index 9dfe8fe5..00000000 --- a/docs/docs_prompt.md +++ /dev/null @@ -1,94 +0,0 @@ -Create multi-page long and explicit professional pytorch-like documentation for the Zeta framework below follow the outline for the zeta library, provide many examples and teach the user about the code, provide examples for every function, make the documentation 10,000 words, provide many usage examples and notes this markdown docs - -Now make the professional documentation for this code, provide the architecture and how the class works and why it works that way, it's purpose, provide args, their types, 3 ways of usage examples, in examples use from shapeless import x - -BE VERY EXPLICIT AND THOROUGH, MAKE IT DEEP AND USEFUL - -######## -Step 1: Understand the purpose and functionality of the module or framework - -Read and analyze the description provided in the documentation to understand the purpose and functionality of the module or framework. -Identify the key features, parameters, and operations performed by the module or framework. -Step 2: Provide an overview and introduction - -Start the documentation by providing a brief overview and introduction to the module or framework. -Explain the importance and relevance of the module or framework in the context of the problem it solves. -Highlight any key concepts or terminology that will be used throughout the documentation. -Step 3: Provide a class or function definition - -Provide the class or function definition for the module or framework. -Include the parameters that need to be passed to the class or function and provide a brief description of each parameter. -Specify the data types and default values for each parameter. -Step 4: Explain the functionality and usage - -Provide a detailed explanation of how the module or framework works and what it does. -Describe the steps involved in using the module or framework, including any specific requirements or considerations. -Provide code examples to demonstrate the usage of the module or framework. -Explain the expected inputs and outputs for each operation or function. -Step 5: Provide additional information and tips - -Provide any additional information or tips that may be useful for using the module or framework effectively. -Address any common issues or challenges that developers may encounter and provide recommendations or workarounds. -Step 6: Include references and resources - -Include references to any external resources or research papers that provide further information or background on the module or framework. -Provide links to relevant documentation or websites for further exploration. -Example Template for the given documentation: - -# Module/Function Name: MultiheadAttention - -class torch.nn.MultiheadAttention(embed_dim, num_heads, dropout=0.0, bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None, vdim=None, batch_first=False, device=None, dtype=None): - """ - Creates a multi-head attention module for joint information representation from the different subspaces. - - Parameters: - - embed_dim (int): Total dimension of the model. - - num_heads (int): Number of parallel attention heads. The embed_dim will be split across num_heads. - - dropout (float): Dropout probability on attn_output_weights. Default: 0.0 (no dropout). - - bias (bool): If specified, adds bias to input/output projection layers. Default: True. - - add_bias_kv (bool): If specified, adds bias to the key and value sequences at dim=0. Default: False. - - add_zero_attn (bool): If specified, adds a new batch of zeros to the key and value sequences at dim=1. Default: False. - - kdim (int): Total number of features for keys. Default: None (uses kdim=embed_dim). - - vdim (int): Total number of features for values. Default: None (uses vdim=embed_dim). - - batch_first (bool): If True, the input and output tensors are provided as (batch, seq, feature). Default: False. - - device (torch.device): If specified, the tensors will be moved to the specified device. - - dtype (torch.dtype): If specified, the tensors will have the specified dtype. - """ - - def forward(query, key, value, key_padding_mask=None, need_weights=True, attn_mask=None, average_attn_weights=True, is_causal=False): - """ - Forward pass of the multi-head attention module. - - Parameters: - - query (Tensor): Query embeddings of shape (L, E_q) for unbatched input, (L, N, E_q) when batch_first=False, or (N, L, E_q) when batch_first=True. - - key (Tensor): Key embeddings of shape (S, E_k) for unbatched input, (S, N, E_k) when batch_first=False, or (N, S, E_k) when batch_first=True. - - value (Tensor): Value embeddings of shape (S, E_v) for unbatched input, (S, N, E_v) when batch_first=False, or (N, S, E_v) when batch_first=True. - - key_padding_mask (Optional[Tensor]): If specified, a mask indicating elements to be ignored in key for attention computation. - - need_weights (bool): If specified, returns attention weights in addition to attention outputs. Default: True. - - attn_mask (Optional[Tensor]): If specified, a mask preventing attention to certain positions. - - average_attn_weights (bool): If true, returns averaged attention weights per head. Otherwise, returns attention weights separately per head. Note that this flag only has an effect when need_weights=True. Default: True. - - is_causal (bool): If specified, applies a causal mask as the attention mask. Default: False. - - Returns: - Tuple[Tensor, Optional[Tensor]]: - - attn_output (Tensor): Attention outputs of shape (L, E) for unbatched input, (L, N, E) when batch_first=False, or (N, L, E) when batch_first=True. - - attn_output_weights (Optional[Tensor]): Attention weights of shape (L, S) when unbatched or (N, L, S) when batched. Optional, only returned when need_weights=True. - """ - - # Implementation of the forward pass of the attention module goes here - - return attn_output, attn_output_weights - - -# Usage example: - -multihead_attn = nn.MultiheadAttention(embed_dim, num_heads) -attn_output, attn_output_weights = multihead_attn(query, key, value) -Note: - -The above template includes the class or function definition, parameters, description, and usage example. -To replicate the documentation for any other module or framework, follow the same structure and provide the specific details for that module or framework. - - -############# CODE TO DOCUMENt -* \ No newline at end of file diff --git a/docs/examples/count-tokens.md b/docs/examples/count-tokens.md deleted file mode 100644 index 2ad237ad..00000000 --- a/docs/examples/count-tokens.md +++ /dev/null @@ -1,29 +0,0 @@ -To count tokens you can use Zeta events and the `TokenCounter` util: - -```python -from zeta import utils -from zeta.events import ( - StartPromptEvent, FinishPromptEvent, -) -from zeta.structures import Agent - - -token_counter = utils.TokenCounter() - -agent = Agent( - event_listeners={ - StartPromptEvent: [ - lambda e: token_counter.add_tokens(e.token_count) - ], - FinishPromptEvent: [ - lambda e: token_counter.add_tokens(e.token_count) - ], - } -) - -agent.run("tell me about large language models") -agent.run("tell me about GPT") - -print(f"total tokens: {token_counter.tokens}") - -``` \ No newline at end of file diff --git a/docs/examples/load-and-query-pinecone.md b/docs/examples/load-and-query-pinecone.md deleted file mode 100644 index 18f7cd71..00000000 --- a/docs/examples/load-and-query-pinecone.md +++ /dev/null @@ -1,49 +0,0 @@ -```python -import hashlib -import json -from urllib.request import urlopen -from decouple import config -from zeta.drivers import PineconeVectorStoreDriver - - -def load_data(driver: PineconeVectorStoreDriver) -> None: - response = urlopen( - "https://raw.githubusercontent.com/wedeploy-examples/" - "supermarket-web-example/master/products.json" - ) - - for product in json.loads(response.read()): - driver.upsert_text( - product["description"], - vector_id=hashlib.md5(product["title"].encode()).hexdigest(), - meta={ - "title": product["title"], - "description": product["description"], - "type": product["type"], - "price": product["price"], - "rating": product["rating"] - }, - namespace="supermarket-products" - ) - - -vector_driver = PineconeVectorStoreDriver( - api_key=config("PINECONE_API_KEY"), - environment=config("PINECONE_ENVIRONMENT"), - index_name=config("PINECONE_INDEX_NAME") -) - -load_data(vector_driver) - -result = vector_driver.query( - "fruit", - count=3, - filter={ - "price": {"$lte": 15}, - "rating": {"$gte": 4} - }, - namespace="supermarket-products" -) - -print(result) -``` \ No newline at end of file diff --git a/docs/examples/load-query-and-chat-marqo.md b/docs/examples/load-query-and-chat-marqo.md deleted file mode 100644 index edaa5076..00000000 --- a/docs/examples/load-query-and-chat-marqo.md +++ /dev/null @@ -1,51 +0,0 @@ -```python -from zeta import utils -from zeta.drivers import MarqoVectorStoreDriver -from zeta.engines import VectorQueryEngine -from zeta.loaders import WebLoader -from zeta.structures import Agent -from zeta.tools import KnowledgeBaseClient -import openai -from marqo import Client - -# Set the OpenAI API key -openai.api_key_path = "../openai_api_key.txt" - -# Define the namespace -namespace = "kyegomez" - -# Initialize the vector store driver -vector_store = MarqoVectorStoreDriver( - api_key=openai.api_key_path, - url="http://localhost:8882", - index="chat2", - mq=Client(api_key="foobar", url="http://localhost:8882") -) - -# Get a list of all indexes -#indexes = vector_store.get_indexes() -#print(indexes) - -# Initialize the query engine -query_engine = VectorQueryEngine(vector_store_driver=vector_store) - -# Initialize the knowledge base tool -kb_tool = KnowledgeBaseClient( - description="Contains information about the Zeta Framework from www.zeta.ai", - query_engine=query_engine, - namespace=namespace -) - -# Load artifacts from the web -artifacts = WebLoader(max_tokens=200).load("https://www.zeta.ai") - -# Upsert the artifacts into the vector store -vector_store.upsert_text_artifacts({namespace: artifacts,}) - -# Initialize the agent -agent = Agent(tools=[kb_tool]) - -# Start the chat -utils.Chat(agent).start() - -``` \ No newline at end of file diff --git a/docs/examples/query-webpage.md b/docs/examples/query-webpage.md deleted file mode 100644 index 0171f02e..00000000 --- a/docs/examples/query-webpage.md +++ /dev/null @@ -1,23 +0,0 @@ -```python -from zeta.artifacts import BaseArtifact -from zeta.drivers import LocalVectorStoreDriver -from zeta.loaders import WebLoader - - -vector_store = LocalVectorStoreDriver() - -[ - vector_store.upsert_text_artifact(a, namespace="zeta") - for a in WebLoader(max_tokens=100).load("https://www.zeta.ai") -] - -results = vector_store.query( - "creativity", - count=3, - namespace="zeta" -) - -values = [BaseArtifact.from_json(r.meta["artifact"]).value for r in results] - -print("\n\n".join(values)) -``` \ No newline at end of file diff --git a/docs/examples/store-conversation-memory-in-dynamodb.md b/docs/examples/store-conversation-memory-in-dynamodb.md deleted file mode 100644 index bb3be374..00000000 --- a/docs/examples/store-conversation-memory-in-dynamodb.md +++ /dev/null @@ -1,47 +0,0 @@ -To store your conversation on DynamoDB you can use DynamoDbConversationMemoryDriver. -```python -from zeta.memory.structure import ConversationMemory -from zeta.memory.structure import ConversationMemoryElement, Turn, Message -from zeta.drivers import DynamoDbConversationMemoryDriver - -# Instantiate DynamoDbConversationMemoryDriver -dynamo_driver = DynamoDbConversationMemoryDriver( - aws_region="us-east-1", - table_name="conversations", - partition_key="convo_id", - value_attribute_key="convo_data", - partition_key_value="convo1" -) - -# Create a ConversationMemory structure -conv_mem = ConversationMemory( - turns=[ - Turn( - turn_index=0, - system=Message("Hello"), - user=Message("Hi") - ), - Turn( - turn_index=1, - system=Message("How can I assist you today?"), - user=Message("I need some information") - ) - ], - latest_turn=Turn( - turn_index=2, - system=Message("Sure, what information do you need?"), - user=None # user has not yet responded - ), - driver=dynamo_driver # set the driver -) - -# Store the conversation in DynamoDB -dynamo_driver.store(conv_mem) - -# Load the conversation from DynamoDB -loaded_conv_mem = dynamo_driver.load() - -# Display the loaded conversation -print(loaded_conv_mem.to_json()) - -``` \ No newline at end of file diff --git a/docs/examples/talk-to-a-pdf.md b/docs/examples/talk-to-a-pdf.md deleted file mode 100644 index bf74062d..00000000 --- a/docs/examples/talk-to-a-pdf.md +++ /dev/null @@ -1,37 +0,0 @@ -This example demonstrates how to vectorize a PDF of the [Attention Is All You Need](https://arxiv.org/pdf/1706.03762.pdf) paper and setup a Zeta agent with rules and the `KnowledgeBase` tool to use it during conversations. - -```python -import io -import requests -from zeta.engines import VectorQueryEngine -from zeta.loaders import PdfLoader -from zeta.structures import Agent -from zeta.tools import KnowledgeBaseClient -from zeta.utils import Chat - -namespace = "attention" - -response = requests.get("https://arxiv.org/pdf/1706.03762.pdf") -engine = VectorQueryEngine() - -engine.vector_store_driver.upsert_text_artifacts( - { - namespace: PdfLoader().load( - io.BytesIO(response.content) - ) - } -) - -kb_client = KnowledgeBaseClient( - description="Contains information about the Attention Is All You Need paper. " - "Use it to answer any related questions.", - query_engine=engine, - namespace=namespace -) - -agent = Agent( - tools=[kb_client] -) - -Chat(agent).start() -``` \ No newline at end of file diff --git a/docs/examples/talk-to-a-webpage.md b/docs/examples/talk-to-a-webpage.md deleted file mode 100644 index 229531a4..00000000 --- a/docs/examples/talk-to-a-webpage.md +++ /dev/null @@ -1,50 +0,0 @@ -This example demonstrates how to vectorize a webpage and setup a Zeta agent with rules and the `KnowledgeBase` tool to use it during conversations. - -```python -from zeta.engines import VectorQueryEngine -from zeta.loaders import WebLoader -from zeta.rules import Ruleset, Rule -from zeta.structures import Agent -from zeta.tools import KnowledgeBaseClient -from zeta.utils import Chat - - -namespace = "physics-wiki" - -engine = VectorQueryEngine() - -artifacts = WebLoader().load( - "https://en.wikipedia.org/wiki/Physics" -) - -engine.vector_store_driver.upsert_text_artifacts( - {namespace: artifacts} -) - - -kb_client = KnowledgeBaseClient( - description="Contains information about physics. " - "Use it to answer any physics-related questions.", - query_engine=engine, - namespace=namespace -) - -agent = Agent( - rulesets=[ - Ruleset( - name="Physics Tutor", - rules=[ - Rule( - "Always introduce yourself as a physics tutor" - ), - Rule( - "Be truthful. Only discuss physics." - ) - ] - ) - ], - tools=[kb_client] -) - -Chat(agent).start() -``` \ No newline at end of file diff --git a/docs/examples/talk-to-redshift.md b/docs/examples/talk-to-redshift.md deleted file mode 100644 index fc4fe4d6..00000000 --- a/docs/examples/talk-to-redshift.md +++ /dev/null @@ -1,46 +0,0 @@ -This example demonstrates how to build an agent that can dynamically query Amazon Redshift Serverless tables and store its contents on the local hard drive. - -Let's build a support agent that uses GPT-4: - -```python -import boto3 -from zeta.drivers import AmazonRedshiftSqlDriver, OpenAiPromptDriver -from zeta.loaders import SqlLoader -from zeta.rules import Ruleset, Rule -from zeta.structures import Agent -from zeta.tools import SqlClient, FileManager -from zeta.utils import Chat - -session = boto3.Session(region_name="REGION_NAME") - -sql_loader = SqlLoader( - sql_driver=AmazonRedshiftSqlDriver( - database="DATABASE", - session=session, - workgroup_name="WORKGROUP_NAME" - ) -) - -sql_tool = SqlClient( - sql_loader=sql_loader, - table_name="people", - table_description="contains information about tech industry professionals", - engine_name="redshift" -) - -agent = Agent( - tools=[sql_tool, FileManager())], - rulesets=[ - Ruleset( - name="HumansOrg Agent", - rules=[ - Rule("Act and introduce yourself as a HumansOrg, Inc. support agent"), - Rule("Your main objective is to help with finding information about people"), - Rule("Only use information about people from the sources available to you") - ] - ) - ] -) - -Chat(agent).start() -``` diff --git a/docs/examples/torch_cs.md b/docs/examples/torch_cs.md new file mode 100644 index 00000000..e6a96d5d --- /dev/null +++ b/docs/examples/torch_cs.md @@ -0,0 +1,16 @@ +# Pytorch Hyper-Optimization +A list of hyper-optimized PyTorch features, such as `torch.compile`, `torch.dynamo`, and other modules and decorators, is a great idea for quick reference. Below is a table that includes a description, use case, and an example for each feature: + +| Feature | Description | Use Case | Python Example | +| ------- | ----------- | -------- | -------------- | +| `torch.compile` | Converts standard PyTorch code into a fused, optimized form. | Use to optimize PyTorch models for faster inference and sometimes training, by fusing operations and eliminating Python overhead. | `@torch.compile`+ + + +
++ +| Argument | Type | Default Value | Description | +| --- | --- | --- | --- | +| `layer` | torch.nn.Module | None | The layer whose weights and bias you want to initialize to zero. | + +
++ +Before we proceed, let us first import the required modules and dependencies. + +```python +import torch +from torch import nn + +from zeta.utils import exists, init_zero_ +``` + +**Example 1: Initializing a Single Linear Layer** + +```python +# Create a single linear layer +layer = nn.Linear(10, 5) + +# Initialize weights and bias to zero +init_zero_(layer) + +print("Weights:", layer.weight) +print("Bias:", layer.bias) +``` + +In this example, you can observe that after applying `init_zero_()`, all the weights and biases of the layer are initialized to zero. + +**Example 2: Initializing All Layers in a Neural Network Model** + +```python +# Create a simple neural network +model = nn.Sequential(nn.Linear(10, 5), nn.ReLU(), nn.Linear(5, 1)) + +# Loop through each layer in the model +for layer in model: + # Check if the layer has a weight, i.e., is a nn.Linear() layer + if exists(layer, "weight"): + init_zero_(layer) + +# Check weights of first layer +print("Weights of First Layer:", model[0].weight) +print("Bias of First Layer:", model[0].bias) + +# Check weights of third layer +print("Weights of Third Layer:", model[2].weight) +print("Bias of Third Layer:", model[2].bias) +``` + +In this example, `init_zero_` is used to initialize all the weights and biases in a neural network model to zero. + +
+") or None - self.middle_id: Optional[int] = self.sp_model.piece_to_id("â") or None - self.suffix_id: Optional[int] = self.sp_model.piece_to_id("â ") or None + self.prefix_id: Optional[int] = ( + self.sp_model.piece_to_id("â ") or None + ) + self.middle_id: Optional[int] = ( + self.sp_model.piece_to_id("â") or None + ) + self.suffix_id: Optional[int] = ( + self.sp_model.piece_to_id("â ") or None + ) self.eot_id: Optional[int] = self.sp_model.piece_to_id("â ") or None logger.info( - f"#words: {self.n_words} - BOS ID: {self.bos_id} - EOS ID: {self.eos_id} " - f"- PRE ID: {self.prefix_id} - MID ID: {self.middle_id} - SUF ID: {self.suffix_id} - EOT ID: {self.eot_id}" + f"#words: {self.n_words} - BOS ID: {self.bos_id} - EOS ID:" + f" {self.eos_id} - PRE ID: {self.prefix_id} - MID ID:" + f" {self.middle_id} - SUF ID: {self.suffix_id} - EOT ID:" + f" {self.eot_id}" ) assert self.sp_model.vocab_size() == self.sp_model.get_piece_size() def encode(self, s: str, bos: bool, eos: bool) -> List[int]: + """ + Encodes a given string using the SentencePiece tokenizer. + + Args: + s (str): The input string to be encoded. + bos (bool): Whether to add a beginning of sentence token. + eos (bool): Whether to add an end of sentence token. + + Returns: + List[int]: The list of encoded tokens. + + """ assert isinstance(s, str) t = self.sp_model.encode(s) if bos: @@ -59,6 +78,14 @@ def encode(self, s: str, bos: bool, eos: bool) -> List[int]: return t def decode(self, t: List[int]) -> str: + """Decode a list of token IDs into a string. + + Args: + t (List[int]): _description_ + + Returns: + str: _description_ + """ return self.sp_model.decode(t) def encode_infilling(self, s: str) -> List[int]: diff --git a/zeta/tokenizers/tiktoken.py b/zeta/tokenizers/tiktoken.py deleted file mode 100644 index 38bca205..00000000 --- a/zeta/tokenizers/tiktoken.py +++ /dev/null @@ -1,127 +0,0 @@ -from __future__ import annotations - -import logging -from typing import Optional - -import tiktoken -from attr import define, field -from zeta.tokenizers.base import BaseTokenizer - - -@define(frozen=True) -class TikToken(BaseTokenizer): - DEFAULT_OPENAI_GPT_3_COMPLETION_MODEL = "text-davinci-003" - DEFAULT_OPENAI_GPT_3_CHAT_MODEL = "gpt-3.5-turbo" - DEFAULT_OPENAI_GPT_4_MODEL = "gpt-4" - DEFAULT_ENCODING = "cl100k_base" - DEFAULT_MAX_TOKENS = 2049 - TOKEN_OFFSET = 8 - - MODEL_PREFIXES_TO_MAX_TOKENS = { - "gpt-4-32k": 32768, - "gpt-4": 8192, - "gpt-3.5-turbo-16k": 16384, - "gpt-3.5-turbo": 4096, - "gpt-35-turbo-16k": 16384, - "gpt-35-turbo": 4096, - "text-davinci-003": 4097, - "text-davinci-002": 4097, - "code-davinci-002": 8001, - "text-embedding-ada-002": 8191, - "text-embedding-ada-001": 2046, - } - - EMBEDDING_MODELS = ["text-embedding-ada-002", "text-embedding-ada-001"] - - model: str = field(default=DEFAULT_OPENAI_GPT_3_CHAT_MODEL, kw_only=True) - - @property - def encoding(self) -> tiktoken.Encoding: - try: - return tiktoken.encoding_for_model(self.model) - except KeyError: - return tiktoken.get_encoding(self.DEFAULT_ENCODING) - - @property - def max_tokens(self) -> int: - tokens = next( - v - for k, v in self.MODEL_PREFIXES_TO_MAX_TOKENS.items() - if self.model.startswith(k) - ) - offset = 0 if self.model in self.EMBEDDING_MODELS else self.TOKEN_OFFSET - - return (tokens if tokens else self.DEFAULT_MAX_TOKENS) - offset - - def encode(self, text: str) -> list[int]: - return self.encoding.encode(text, allowed_special=set(self.stop_sequences)) - - def decode(self, tokens: list[int]) -> str: - return self.encoding.decode(tokens) - - def tokens_left(self, text: str | list) -> int: - return super().tokens_left(text) - - def token_count(self, text: str | list, model: Optional[str] = None) -> int: - """ - Handles the special case of ChatML. Implementation adopted from the official OpenAI notebook: - https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb - """ - if isinstance(text, list): - model = model if model else self.model - - try: - encoding = tiktoken.encoding_for_model(model) - except KeyError: - logging.warning("model not found. Using cl100k_base encoding.") - - encoding = tiktoken.get_encoding("cl100k_base") - - if model in { - "gpt-3.5-turbo-0613", - "gpt-3.5-turbo-16k-0613", - "gpt-4-0314", - "gpt-4-32k-0314", - "gpt-4-0613", - "gpt-4-32k-0613", - }: - tokens_per_message = 3 - tokens_per_name = 1 - elif model == "gpt-3.5-turbo-0301": - # every message follows - # <|start|>{role/name}\n{content}<|end|>\n - tokens_per_message = 4 - # if there's a name, the role is omitted - tokens_per_name = -1 - elif "gpt-3.5-turbo" in model or "gpt-35-turbo" in model: - logging.info( - "gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613." - ) - return self.token_count(text, model="gpt-3.5-turbo-0613") - elif "gpt-4" in model: - logging.info( - "gpt-4 may update over time. Returning num tokens assuming gpt-4-0613." - ) - return self.token_count(text, model="gpt-4-0613") - else: - raise NotImplementedError( - f"""token_count() is not implemented for model {model}. - See https://github.com/openai/openai-python/blob/main/chatml.md for - information on how messages are converted to tokens.""" - ) - - num_tokens = 0 - - for message in text: - num_tokens += tokens_per_message - for key, value in message.items(): - num_tokens += len(encoding.encode(value)) - if key == "name": - num_tokens += tokens_per_name - - # every reply is primed with <|start|>assistant<|message|> - num_tokens += 3 - - return num_tokens - else: - return super().token_count(text) diff --git a/zeta/tokenizers/tokenmonster.py b/zeta/tokenizers/tokenmonster.py index 8b52c739..b6302b4a 100644 --- a/zeta/tokenizers/tokenmonster.py +++ b/zeta/tokenizers/tokenmonster.py @@ -1,4 +1,3 @@ -import numpy as np import tokenmonster @@ -226,7 +225,11 @@ def modify( int: The new size of the vocabulary. """ return self.vocab.modify( - add_special_tokens, add_regular_tokens, delete_tokens, resize, change_unk + add_special_tokens, + add_regular_tokens, + delete_tokens, + resize, + change_unk, ) def add_token(self, token): diff --git a/zeta/training/__init__.py b/zeta/training/__init__.py index 970f592c..d54e6855 100644 --- a/zeta/training/__init__.py +++ b/zeta/training/__init__.py @@ -1,10 +1,9 @@ # training -from zeta.training.train import Trainer, train from zeta.training.dataloader import build_dataloaders, build_pre_tokenized from zeta.training.fsdp import fsdp -from zeta.training.scheduler import get_lr_scheduler_with_warmup from zeta.training.parallel_wrapper import ParallelWrapper - +from zeta.training.scheduler import get_lr_scheduler_with_warmup +from zeta.training.train import Trainer, train __all__ = [ "Trainer", diff --git a/zeta/training/activation_checkpoint.py b/zeta/training/activation_checkpoint.py index 0c251e94..dc46e277 100644 --- a/zeta/training/activation_checkpoint.py +++ b/zeta/training/activation_checkpoint.py @@ -1,15 +1,113 @@ +import functools +import typing from functools import partial import torch from accelerate import Accelerator - - from torch.distributed.algorithms._checkpoint.checkpoint_wrapper import ( CheckpointImpl, - apply_activation_checkpointing, checkpoint_wrapper, ) +try: + from torch.distributed.algorithms._checkpoint.checkpoint_wrapper import ( + apply_activation_checkpointing, + ) +except ModuleNotFoundError: + # let's patch the error. + import torch.distributed.algorithms._checkpoint.checkpoint_wrapper + + def lambda_auto_wrap_policy( + module: torch.nn.Module, + recurse: bool, + unwrapped_params: int, + lambda_fn: typing.Callable, + ) -> bool: + """ + A convenient auto wrap policy to wrap submodules based on an arbitrary user + function. If `lambda_fn(submodule) == True``, the submodule will be wrapped as + a `wrapper_cls` unit. + + Return if a module should be wrapped during auto wrapping. + + The first three parameters are required by :func:`_recursive_wrap`. + + Args: + module (nn.Module): + The module to be considered in this decision. + recurse (bool): + Indicate if this is called to make a decision on whether we + should recurse down a subgraph of the module structure. + If False, it means this function is called to make a decision + on whether we should wrap the said module. + unwrapped_params (int): + The number of parameters yet to be wrapped in this module. + + lambda_fn (Callable[nn.Module] -> bool): + If this returns ``True``, this module will be wrapped by + wrapper_cls individually. + """ + if recurse: + # always recurse + return True + else: + # if not recursing, decide whether we should wrap for the leaf node or reminder + return lambda_fn(module) + + def apply_activation_checkpointing_wrapper( + model, + checkpoint_wrapper_fn=torch.distributed.algorithms._checkpoint.checkpoint_wrapper.checkpoint_wrapper, + check_fn=lambda _: True, + ): + """ + Applies :func:`checkpoint_wrapper` to modules within `model` based on a user-defined + configuration. For each module within `model`, the `check_fn` is used to decide + whether `module` should be wrapped with :func:`checkpoint_wrapper` or not. + + Note:: + This function modifies `model` in place and replaces appropriate layers with + their checkpoint-wrapped modules. + Note:: + This function will not wrap the overall root module. If this is needed, please directly use + :class:`CheckpointWrapper`. + Usage:: + model = nn.Sequential( + nn.Linear(10, 10), nn.Linear(10, 10), nn.Linear(10, 10) + ) + check_fn = lambda l: isinstance(l, nn.Linear) + apply_activation_checkpointing(model, checkpoint_wrapper_fn=checkpoint_wrapper, check_fn=check_fn) + Args: + module (nn.Module): + The model who's submodules (or self) should be wrapped with activation checkpointing. + checkpoint_wrapper_fn (Optional[Callable[nn.Module]]) + A `Callable` which will wrap modules + check_fn (Optional[Callable[nn.Module, nn.Module]]) + A lambda function which will be passed current layer and returns + ``True`` or ``False`` depending on whether input layer should be wrapped. + Returns: None (`model` is modified inplace) + """ + # TODO: Importing inside function to avoid circular import issue between FSDP and + # checkpoint_wrapper. This can be resolved once wrap() APIs are decoupled from FSDP code. + from torch.distributed.fsdp.wrap import _recursive_wrap + + return _recursive_wrap( + module=model, + auto_wrap_policy=functools.partial( + lambda_auto_wrap_policy, lambda_fn=check_fn + ), + wrapper_cls=checkpoint_wrapper_fn, + ignored_modules=set(), + ignored_params=set(), + only_wrap_children=True, + ) + + setattr( + torch.distributed.algorithms._checkpoint.checkpoint_wrapper, + "apply_activation_checkpointing", + apply_activation_checkpointing_wrapper, + ) + apply_activation_checkpointing = apply_activation_checkpointing_wrapper + def activation_checkpointing( model: torch.nn.Module, diff --git a/zeta/training/dataloader.py b/zeta/training/dataloader.py index add5ed2a..447799ad 100644 --- a/zeta/training/dataloader.py +++ b/zeta/training/dataloader.py @@ -1,4 +1,5 @@ from itertools import chain + from datasets import load_dataset from transformers import AutoTokenizer @@ -20,7 +21,9 @@ def build_dataloaders(seq_len: int = None, num_cpu: int = None): dataset = load_dataset("openwebtext", split="train") tokenized_dataset = dataset.map( - lambda example: tokenizer([t + tokenizer.eos_token for t in example["text"]]), + lambda example: tokenizer( + [t + tokenizer.eos_token for t in example["text"]] + ), batched=True, num_proc=seq_len, remove_columns=["text"], @@ -32,7 +35,9 @@ def build_dataloaders(seq_len: int = None, num_cpu: int = None): # dataset and generate chunks of block_size. def group_texts(examples): # Concatenate all texts. - concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()} + concatenated_examples = { + k: list(chain(*examples[k])) for k in examples.keys() + } total_length = len(concatenated_examples[list(examples.keys())[0]]) # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can # customize this part to your needs. @@ -40,7 +45,10 @@ def group_texts(examples): total_length = (total_length // block_size) * block_size # Split by chunks of max_len. result = { - k: [t[i : i + block_size] for i in range(0, total_length, block_size)] + k: [ + t[i : i + block_size] + for i in range(0, total_length, block_size) + ] for k, t in concatenated_examples.items() } return result diff --git a/zeta/training/fsdp.py b/zeta/training/fsdp.py index 4d203151..6c9afe35 100644 --- a/zeta/training/fsdp.py +++ b/zeta/training/fsdp.py @@ -2,13 +2,11 @@ import torch from torch.distributed.fsdp import ( + BackwardPrefetch, FullyShardedDataParallel, MixedPrecision, - BackwardPrefetch, ShardingStrategy, ) - - from torch.distributed.fsdp.wrap import transformer_auto_wrap_policy @@ -71,9 +69,8 @@ def fsdp( ) else: raise ValueError( - "Invalid scheduler_type. Expected 'bf16', 'fp16' or 'fp32', got: {}".format( - mp - ) + "Invalid scheduler_type. Expected 'bf16', 'fp16' or 'fp32', got:" + f" {mp}" ) if shard_strat == "SHARD_GRAD": @@ -84,9 +81,8 @@ def fsdp( sharding_strat_fsdp = ShardingStrategy.NO_SHARD else: raise ValueError( - "Invalid scheduler_type. Expected 'SHARD_GRAD', 'FULL_SHARD' or 'NO_SHARD', got: {}".format( - shard_strat - ) + "Invalid scheduler_type. Expected 'SHARD_GRAD', 'FULL_SHARD' or" + " 'NO_SHARD', got: {}".format(shard_strat) ) model = FullyShardedDataParallel( diff --git a/zeta/training/galore.py b/zeta/training/galore.py new file mode 100644 index 00000000..afe2df1c --- /dev/null +++ b/zeta/training/galore.py @@ -0,0 +1,89 @@ +import torch +from torch import nn +from typing import Tuple, Iterable + + +class GaloreOptimizer(torch.optim.Optimizer): + def __init__( + self, + model: nn.Module, + optimizer: torch.optim.Optimizer, + criterion: nn.Module, + device: torch.device, + model_dim: int, + compact_dim: int, + params: Iterable[torch.Tensor], + lr: float = 0.002, + weight_decay: float = 0.2, + betas: Tuple[float, float] = (0.9, 0.99), + eps: float = 1e-8, + clip_thresh: float = 1.0, + precision: str = "amp_bfloat16", + custom_scalar: int = 65536, + ) -> None: + super(GaloreOptimizer, self).__init__( + params, + dict( + lr=lr, weight_decay=weight_decay, beta1=betas[0], beta2=betas[1] + ), + ) + self.model = model + self.optimizer = optimizer + self.criterion = criterion + self.device = device + self.eps = eps + self.d = clip_thresh + self.precision = precision + self.custom_scaler = custom_scalar + # Initialize the projection and back projection layers + self.proj = nn.Linear(model_dim, compact_dim).to(device) + self.back_proj = nn.Linear(compact_dim, model_dim).to(device) + for group in self.param_groups: + group["step"] = 1.0 + print("Using StableAdamWUnfused-v1") + + def step(self, closure=None): + """Performs a single optimization step (parameter update).""" + if closure is not None: + closure_result = closure() + + for group in self.param_groups: + lr = group["lr"] + group["weight_decay"] + group["beta1"] + group["beta2"] + group["step"] + + for p in group["params"]: + if p.grad is None: + continue + # Original gradient + g = p.grad.data + if self.precision == "custom_fp16": + g = g / self.custom_scaler + if torch.any(torch.isnan(g) | torch.isinf(g)): + continue + + # Projection to compact space + g_compact = self.proj(g.view(1, -1)).view_as(g) + + # Here you can include the update logic (e.g., Adam, SGD) applied on `g_compact` + # For simplicity, let's use a simplified update rule directly on the compact representation + # Note: This is where you'd typically integrate with self.optimizer logic for a real implementation + # Assuming g_compact has been obtained from the projection of gradients + lr = group["lr"] + + # Simplified update rule (akin to SGD) in compact space + update_compact = -lr * g_compact + + # Back-projection to original space for applying the update + update_original = self.back_proj( + update_compact.view(1, -1) + ).view_as(g) + + # Apply update to the parameters + p.data.add_(update_original) + + group["step"] += 1 + + return closure_result if closure is not None else None diff --git a/zeta/training/hive_trainer.py b/zeta/training/hive_trainer.py index 42d75528..b29675de 100644 --- a/zeta/training/hive_trainer.py +++ b/zeta/training/hive_trainer.py @@ -17,9 +17,8 @@ """ -import torch -import torch.distributed as dist import threading + from zeta.training.train import Trainer @@ -144,7 +143,9 @@ def train( "seq_len": self.seq_len, "entity_name": self.entity_name, "use_fsdp": self.use_fsdp, - "use_activation_checkpointing": self.use_activation_checkpointing, + "use_activation_checkpointing": ( + self.use_activation_checkpointing + ), "learning_rate": self.learning_rate, "seed": self.seed, "use_pretokenized": self.use_pretokenized, @@ -169,7 +170,6 @@ def train( # # Instantiate models # models = [YourModelClass1(), YourModelClass2()] # Replace with your model classes - # # Instantiate HiveTrainer and begin training # hive_trainer = HiveTrainer( # models=models, diff --git a/zeta/training/scheduler.py b/zeta/training/scheduler.py index 509dbab8..d715108b 100644 --- a/zeta/training/scheduler.py +++ b/zeta/training/scheduler.py @@ -1,7 +1,5 @@ import torch from accelerate import Accelerator - - from transformers import ( get_cosine_schedule_with_warmup, get_linear_schedule_with_warmup, diff --git a/zeta/training/train.py b/zeta/training/train.py index 1bf4a52a..ec8c86c7 100644 --- a/zeta/training/train.py +++ b/zeta/training/train.py @@ -17,28 +17,71 @@ def print_num_params(model, accelerator: Accelerator): + """Print number of parameters in model""" # n_params = sum(p.numel() for p in model.parameters() if p.requires_grad) n_params = sum(p.numel() for p in model.parameters() if p.requires_grad) accelerator.print(f"Number of parameters in model: {n_params}") def Trainer( - gradient_accumulate_every: int = None, + gradient_accumulate_every: int = 2, batch_size: int = None, seq_len: int = None, - entity_name: str = None, + entity_name: str = "zeta", model=None, use_fsdp: bool = False, use_activation_checkpointing: bool = False, - learning_rate=None, - seed=None, + learning_rate: float = None, + seed: int = None, use_pretokenized: bool = False, - resume_from_checkpoint=None, + resume_from_checkpoint: bool = None, checkpointing_steps=None, - output_dir=None, - weight_decay=None, + output_dir: str = "checlpoints/", + optimizer_type: str = "Adam8bit", + weight_decay: float = 0.1, use_deepspeed=None, + *args, + **kwargs, ): + """Trainer + + Args: + gradient_accumulate_every (int, optional): _description_. Defaults to None. + batch_size (int, optional): _description_. Defaults to None. + seq_len (int, optional): _description_. Defaults to None. + entity_name (str, optional): _description_. Defaults to None. + model (_type_, optional): _description_. Defaults to None. + use_fsdp (bool, optional): _description_. Defaults to False. + use_activation_checkpointing (bool, optional): _description_. Defaults to False. + learning_rate (_type_, optional): _description_. Defaults to None. + seed (_type_, optional): _description_. Defaults to None. + use_pretokenized (bool, optional): _description_. Defaults to False. + resume_from_checkpoint (_type_, optional): _description_. Defaults to None. + checkpointing_steps (_type_, optional): _description_. Defaults to None. + output_dir (_type_, optional): _description_. Defaults to None. + weight_decay (_type_, optional): _description_. Defaults to None. + use_deepspeed (_type_, optional): _description_. Defaults to None. + + Examples: + >>> Trainer( + >>> gradient_accumulate_every=gradient_accumulate_every, + >>> batch_size=batch_size, + >>> seq_len=seq_len, + >>> entity_name=entity_name, + >>> model=model, + >>> use_fsdp=use_fsdp, + >>> use_activation_checkpointing=use_activation_checkpointing, + >>> learning_rate=learning_rate, + >>> seed=seed, + >>> use_pretokenized=use_pretokenized, + >>> resume_from_checkpoint=resume_from_checkpoint, + >>> checkpointing_steps=checkpointing_steps, + >>> output_dir=output_dir, + >>> weight_decay=weight_decay, + >>> use_deepspeed=use_deepspeed, + >>> ) + + """ # accelerator timeout = InitProcessGroupKwargs(timeout=timedelta(seconds=1_000_000)) @@ -52,7 +95,7 @@ def Trainer( # AcceleratorState().deepspeed_plugin.deepspeed_config['train_micro_batch_ accelerator.init_trackers( - project_name="LongNet", + project_name=entity_name, config={ "batch_size": batch_size, "gradient_accumulate_every": gradient_accumulate_every, @@ -101,7 +144,7 @@ def Trainer( weight_decay=weight_decay, beta_1=0.90, beta_2=0.95, - optimizer_type="Adam8bit", + optimizer_type=optimizer_type, use_fsdp=True, accelerator=accelerator, ) @@ -155,14 +198,17 @@ def Trainer( if resume_from_checkpoint: if resume_from_checkpoint is not None or resume_from_checkpoint != "": - accelerator.print(f"Resuming from checkpoint {resume_from_checkpoint}") + accelerator.print( + f"Resuming from checkpoint {resume_from_checkpoint}" + ) accelerator.load_state(resume_from_checkpoint) path = os.path.basename(resume_from_checkpoint) training_difference = os.path.splitext(path)[0] # need to multiply `gradient_accumulation_steps` to reflect real steps resume_step = ( - int(training_difference.replace("step_", "")) * gradient_accumulate_every + int(training_difference.replace("step_", "")) + * gradient_accumulate_every ) if resume_from_checkpoint and resume_step is not None: @@ -204,32 +250,38 @@ def Trainer( # end training - # accelerator.print(f"Training Finished") + accelerator.print("Training Finished") accelerator.end_training() # save final model - # accelerator.print(f"Saving model to {output_dir}") + accelerator.print(f"Saving model to {output_dir}") if output_dir is not None: accelerator.wait_for_everyone() unwrapped_model = accelerator.unwrap_model(model) with accelerator.main_process_first(): accelerator.save( - unwrapped_model.state_dict(), f"{output_dir}/final/final_model.pt" + unwrapped_model.state_dict(), + f"{output_dir}/final/final_model.pt", ) -def train(MASTER_ADDR=None, MASTER_PORT=None, RANK=None, WORLD_SIZE=None): +def train( + MASTER_ADDR=None, + MASTER_PORT=None, + RANK=None, + WORLD_SIZE=None, + *args, + **kwargs, +): os.environ["MASTER_ADDR"] or MASTER_ADDR # = 'localhost' os.environ["MASTER_PORT"] or MASTER_PORT # = '9994' # # [CRITICAL] Pay attention to this when scaling to multiple GPUs and clusters - # # Pay attention to this, use "accelerate config" - os.environ["RANK"] or RANK # = str(0) # Number of nodes (servers) os.environ["WORLD_SIZE"] or WORLD_SIZE # = str(torch.cuda.device_count()) torch.distributed.init_process_group() - Trainer() + Trainer(*args, **kwargs) diff --git a/zeta/utils/__init__.py b/zeta/utils/__init__.py index eeb1daf6..4ef4ff67 100644 --- a/zeta/utils/__init__.py +++ b/zeta/utils/__init__.py @@ -1,3 +1,98 @@ -# Copyright (c) 2022 Agora -# Licensed under The MIT License [see LICENSE for details] -from zeta.utils.main import * +from zeta.utils.cuda_memory_wrapper import track_cuda_memory_usage + +from zeta.utils.benchmark import ( + benchmark, + print_cuda_memory_usage, + save_memory_snapshot, +) +from zeta.utils.disable_logging import disable_warnings_and_logs +from zeta.utils.params import print_num_params, print_main +from zeta.utils.module_device import module_device +from zeta.utils.save_load_wrapper import save_load +from zeta.utils.main import ( + exists, + default, + once, + eval_decorator, + cast_tuple, + maybe, + init_zero_, + pick_and_pop, + group_dict_by_key, + string_begins_with, + group_by_key_prefix, + top_p, + top_k, + top_a, + log, + gumbel_noise, + video_tensor_to_gift, + gif_to_tensor, + l2norm, + pad_at_dim, + cosine_beta_schedule, + cast_if_src_dtype, + get_sinusoid_encoding_table, + interpolate_pos_encoding_2d, + seek_all_images, +) + +from zeta.utils.enforce_types import enforce_types +from zeta.utils.cuda_wrapper import ( + get_cuda_bare_metal_version, + check_cuda_torch_binary_vs_bare_metal, + raise_if_cuda_home_none, + append_nvcc_threads, + check_cuda, +) +from zeta.utils.verbose_execution import VerboseExecution +from zeta.utils.log_pytorch_op import log_torch_op +from zeta.utils.img_to_tensor import img_to_tensor +from zeta.utils.text_to_tensor import text_to_tensor + +__all__ = [ + "track_cuda_memory_usage", + "benchmark", + "print_cuda_memory_usage", + "save_memory_snapshot", + "disable_warnings_and_logs", + "print_main", + "module_device", + "save_load", + "exists", + "default", + "once", + "eval_decorator", + "cast_tuple", + "maybe", + "init_zero_", + "pick_and_pop", + "group_dict_by_key", + "string_begins_with", + "group_by_key_prefix", + "top_p", + "top_k", + "top_a", + "log", + "gumbel_noise", + "print_num_params", + "video_tensor_to_gift", + "gif_to_tensor", + "l2norm", + "pad_at_dim", + "cosine_beta_schedule", + "cast_if_src_dtype", + "get_sinusoid_encoding_table", + "interpolate_pos_encoding_2d", + "enforce_types", + "get_cuda_bare_metal_version", + "check_cuda_torch_binary_vs_bare_metal", + "raise_if_cuda_home_none", + "append_nvcc_threads", + "check_cuda", + "VerboseExecution", + "seek_all_images", + "log_torch_op", + "img_to_tensor", + "text_to_tensor", +] diff --git a/zeta/utils/benchmark.py b/zeta/utils/benchmark.py new file mode 100644 index 00000000..a2e2728e --- /dev/null +++ b/zeta/utils/benchmark.py @@ -0,0 +1,117 @@ +import random +from contextlib import contextmanager, nullcontext +from dataclasses import dataclass, field +from pathlib import Path +from pickle import dump +from typing import Callable, Optional + +import torch +import torch.utils.benchmark as benchmark +from torch.cuda._memory_viz import profile_plot +from torch.profiler import ProfilerActivity, profile, record_function + + +@dataclass +class ProfileConfig: + file_path: Optional[str] = None + name: Optional[str] = None + cuda: bool = True + iters: int = 0 + warmup_iters: int = 0 + sync: bool = False + extra_kwargs: dict = field(default_factory=dict) + memory_profile_path: Optional[str] = None + + +def benchmark_torch_function_in_microseconds( + func: Callable, *args, **kwargs +) -> float: + # warmup + for _ in range(5): + func(*args, **kwargs) + t0 = benchmark.Timer( + stmt="func(*args, **kwargs)", + globals={"args": args, "kwargs": kwargs, "func": func}, + ) + return t0.blocked_autorange().median * 1e6 + + +def profile_function( + config: ProfileConfig, func: Callable, *args, **kwargs +) -> torch.profiler.profile: + """Profile a torch function and save the result to a file""" + seed = 123 + random.seed(seed) + torch.manual_seed(seed) + + activities = [ProfilerActivity.CPU] + if config.cuda: + activities.append(ProfilerActivity.CUDA) + + if config.warmup_iters >= 0: + for _ in range(config.warmup_iters): + func(*args, **kwargs) + if config.sync: + torch.cuda.synchronize() + name_context = ( + nullcontext() if config.name is None else record_function(config.name) + ) + profile_memory = config.memory_profile_path is not None + with profile( + activities=activities, + profile_memory=profile_memory, + record_shapes=profile_memory, + with_stack=profile_memory, + **config.extra_kwargs, + ) as prof: + for _ in range(config.iters): + with name_context: + func(*args, **kwargs) + if config.sync: + torch.cuda.synchronize() + + if config.file_path is not None: + prof.export_chrome_trace(config.file_path) + + if profile_memory: + with open(config.memory_profile_path, "w") as f: + f.write(profile_plot(prof)) + + if config.file_path is None: + print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10)) + + return prof + + +@contextmanager +def print_cuda_memory_usage(): + initial_memory = torch.cuda.memory_allocated() + try: + yield + finally: + memory_usage = torch.cuda.memory_allocated() - initial_memory + memory_usage_gb = memory_usage / (1024**3) + print(f"CUDA memory usage: {memory_usage_gb:.2f} GB") + + +@contextmanager +def save_memory_snapshot(file_path: Path): + """Save a memory snapshot information to a folder + Usage: + with save_memory_snapshot(file_path): + # code to profile + + Args: + file_path: The path to the folder to save the snapshot to + will create the folder if it doesn't exist + """ + file_path.mkdir(parents=True, exist_ok=True) + torch.cuda.memory._record_memory_history() + try: + yield + finally: + s = torch.cuda.memory._snapshot() + with open(f"{file_path}/snapshot.pickle", "wb") as f: + dump(s, f) + with open(f"{file_path}/trace_plot.html", "w") as f: + f.write(torch.cuda._memory_viz.trace_plot(s)) diff --git a/zeta/utils/cuda_memory_wrapper.py b/zeta/utils/cuda_memory_wrapper.py new file mode 100644 index 00000000..f15e62c0 --- /dev/null +++ b/zeta/utils/cuda_memory_wrapper.py @@ -0,0 +1,55 @@ +import functools +import logging + +import torch + +# Logging initialization +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) + + +# Main function +def track_cuda_memory_usage(func): + """Track CUDA memory usage of a function. + + Args: + func (function): The function to be tracked. + + Returns: + function: The wrapped function. + + Example: + >>> @track_cuda_memory_usage + >>> def train(): + >>> pass + >>> train() + """ + + @functools.wraps(func) + def wrapper(*args, **kwargs): + if not torch.cuda.is_available(): + logging.warning("CUDA is not available, skip tracking memory usage") + return func(*args, **kwargs) + + torch.cuda.synchronize() + before_memory = torch.cuda.memory_allocated() + + try: + result = func(*args, **kwargs) + except Exception as error: + logging.error(f"Error occurs when running {func.__name__}: {error}") + raise + + finally: + torch.cuda.synchronize() + after_memory = torch.cuda.memory_allocated() + memory_diff = after_memory - before_memory + logging.info( + f"Memory usage of {func.__name__}: {memory_diff} bytes" + ) + + return result + + return wrapper diff --git a/zeta/utils/cuda_wrapper.py b/zeta/utils/cuda_wrapper.py new file mode 100644 index 00000000..dcdda696 --- /dev/null +++ b/zeta/utils/cuda_wrapper.py @@ -0,0 +1,171 @@ +import os +import subprocess + +import torch + +# from setuptools import setup +from torch.utils.cpp_extension import ( + CUDA_HOME, +) # , BuildExtension, CUDAExtension + +# ninja build does not work unless include_dirs are abs path +this_dir = os.path.dirname(os.path.abspath(__file__)) + + +def get_cuda_bare_metal_version(cuda_dir: str): + """ + Retrieves the bare metal version of CUDA installed in the specified directory. + + Args: + cuda_dir (str): The directory where CUDA is installed. + + Returns: + tuple: A tuple containing the raw output of the command, the major version of the bare metal CUDA, and the minor version of the bare metal CUDA. + """ + raw_output = subprocess.check_output( + [cuda_dir + "/bin/nvcc", "-V"], text=True + ) + output = raw_output.split() + release_idx = output.index("release") + 1 + release = output[release_idx].split(".") + bare_metal_major = release[0] + bare_metal_minor = release[1][0] + + return raw_output, bare_metal_major, bare_metal_minor + + +def check_cuda_torch_binary_vs_bare_metal(cuda_dir: str): + """ + Compares the version of CUDA used to compile PyTorch binaries with the version + of CUDA used to compile CUDA extensions. Raises a RuntimeError if there is a + version mismatch. + + Args: + cuda_dir (str): The directory path where CUDA is installed. + + Raises: + RuntimeError: If the version of CUDA used to compile CUDA extensions does + not match the version used to compile PyTorch binaries. + + Returns: + None + """ + ( + raw_output, + bare_metal_major, + bare_metal_minor, + ) = get_cuda_bare_metal_version(cuda_dir) + torch_binary_major = torch.version.cuda.split(".")[0] + torch_binary_minor = torch.version.cuda.split(".")[1] + + print("\nCompiling cuda extensions with") + print(raw_output + "from " + cuda_dir + "/bin\n") + + if (bare_metal_major != torch_binary_major) or ( + bare_metal_minor != torch_binary_minor + ): + raise RuntimeError( + "Cuda extensions are being compiled with a version of Cuda that" + " does not match the version used to compile Pytorch binaries. " + " Pytorch binaries were compiled with Cuda {}.\n".format( + torch.version.cuda + ) + + "In some cases, a minor-version mismatch will not cause later" + " errors: " + " https://github.com/NVIDIA/apex/pull/323#discussion_r287021798. " + " You can try commenting out this check (at your own risk)." + ) + + +def raise_if_cuda_home_none(global_option: str) -> None: + if CUDA_HOME is not None: + return + raise RuntimeError( + f"{global_option} was requested, but nvcc was not found. Are you sure" + " your environment has nvcc available? If you're installing within a" + " container from https://hub.docker.com/r/pytorch/pytorch, only images" + " whose names contain 'devel' will provide nvcc." + ) + + +def append_nvcc_threads(nvcc_extra_args): + _, bare_metal_major, bare_metal_minor = get_cuda_bare_metal_version( + CUDA_HOME + ) + if int(bare_metal_major) >= 11 and int(bare_metal_minor) >= 2: + return nvcc_extra_args + ["--threads", "4"] + return nvcc_extra_args + + +def check_cuda(): + if not torch.cuda.is_available(): + # https://github.com/NVIDIA/apex/issues/486 + # Extension builds after https://github.com/pytorch/pytorch/pull/23408 attempt to query torch.cuda.get_device_capability(), + # which will fail if you are compiling in an environment without visible GPUs (e.g. during an nvidia-docker build command). + print( + "\nWarning: Torch did not find available GPUs on this system.\n", + ( + "If your intention is to cross-compile, this is not an" + " error.\nBy default, Apex will cross-compile for Pascal" + " (compute capabilities 6.0, 6.1, 6.2),\nVolta (compute" + " capability 7.0), Turing (compute capability 7.5),\nand, if" + " the CUDA version is >= 11.0, Ampere (compute capability" + " 8.0).\nIf you wish to cross-compile for a single specific" + ' architecture,\nexport TORCH_CUDA_ARCH_LIST="compute' + ' capability" before running setup.py.\n' + ), + ) + if os.environ.get("TORCH_CUDA_ARCH_LIST", None) is None: + _, bare_metal_major, bare_metal_minor = get_cuda_bare_metal_version( + CUDA_HOME + ) + if int(bare_metal_major) == 11: + os.environ["TORCH_CUDA_ARCH_LIST"] = "6.0;6.1;6.2;7.0;7.5;8.0" + if int(bare_metal_minor) > 0: + os.environ["TORCH_CUDA_ARCH_LIST"] = ( + "6.0;6.1;6.2;7.0;7.5;8.0;8.6" + ) + else: + os.environ["TORCH_CUDA_ARCH_LIST"] = "6.0;6.1;6.2;7.0;7.5" + + +# print("\n\ntorch.__version__ = {}\n\n".format(torch.__version__)) +# TORCH_MAJOR = int(torch.__version__.split(".")[0]) +# TORCH_MINOR = int(torch.__version__.split(".")[1]) + +# cmdclass = {} +# ext_modules = [] + +# raise_if_cuda_home_none("flashmm") +# # Check, if CUDA11 is installed for compute capability 8.0 +# cc_flag = [] +# # cc_flag.append("-gencode") +# # cc_flag.append("arch=compute_70,code=sm_70") +# cc_flag.append("-gencode") +# cc_flag.append("arch=compute_80,code=sm_80") + +# ext_modules.append( +# CUDAExtension( +# 'flashmm', [ +# 'flash_mm.cpp', +# 'mm_block_fwd_cuda.cu', +# 'hyena_filter_cuda.cu', +# ], +# extra_compile_args={'cxx': ['-g', '-march=native', '-funroll-loops'], +# 'nvcc': ['-O3', '--threads', '4', '-lineinfo', '--use_fast_math', '-std=c++17', '-arch=compute_70'] +# # extra_compile_args={'cxx': ['-O3'], +# # 'nvcc': append_nvcc_threads(['-O3', '-lineinfo', '--use_fast_math', '-std=c++17'] + cc_flag) +# }, +# include_dirs=[os.path.join(this_dir, 'mathdx/22.02/include')], +# ) +# ) + +# torch.utils.cpp_extension.COMMON_NVCC_FLAGS.remove('-D__CUDA_NO_HALF2_OPERATORS__') + +# setup( +# name="flashmm", +# version="0.1", +# description="Fast modules for Monarch Mixer block", +# ext_modules=ext_modules, +# cmdclass={"build_ext": BuildExtension} if ext_modules else {}, +# ) diff --git a/zeta/utils/disable_logging.py b/zeta/utils/disable_logging.py new file mode 100644 index 00000000..f8401ea8 --- /dev/null +++ b/zeta/utils/disable_logging.py @@ -0,0 +1,42 @@ +import os +import warnings +import logging + +# Immediately suppress warnings +warnings.filterwarnings("ignore") + +# Set environment variables to minimize logging before importing any modules +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" # Suppress TensorFlow logs + +# Force NumExpr to use minimal threads to reduce its logging output +os.environ["NUMEXPR_MAX_THREADS"] = "1" +os.environ["NUMEXPR_NUM_THREADS"] = "1" + + +def disable_warnings_and_logs(): + # Attempt to reduce TensorFlow verbosity if installed + try: + import tensorflow as tf + + tf.get_logger().setLevel(logging.ERROR) + tf.autograph.set_verbosity(3) + except ImportError: + pass + + # Reduce logging for known verbose libraries + logging.getLogger().setLevel( + logging.CRITICAL + ) # Suppress most logs globally + + # Suppress specific verbose loggers known to output unwanted messages + for logger_name in ["transformers", "torch", "tensorflow", "numexpr"]: + logging.getLogger(logger_name).setLevel(logging.CRITICAL) + + # Specifically target the NumExpr logger if it's being stubborn + logging.getLogger("numexpr").setLevel(logging.CRITICAL) + + +# Run the suppression function at the start +disable_warnings_and_logs() + +# Ensure to place any of your script's import statements here, after the call to disable_warnings_and_logs() diff --git a/zeta/utils/enforce_types.py b/zeta/utils/enforce_types.py new file mode 100644 index 00000000..58ffdde5 --- /dev/null +++ b/zeta/utils/enforce_types.py @@ -0,0 +1,40 @@ +from functools import wraps +from typing import Callable + + +def enforce_types(func: Callable) -> Callable: + """ + A decorator to enforce type checks on the input parameters of a function based on its annotations. + + If a parameter doesn't have a type annotation, it can be of any type. + + Args: + func (Callable): The function whose parameters are to be checked. + + Returns: + Callable: The wrapped function with type checks. + + Examples: + @enforce_types + def add(a: int, b: int) -> int: + return a + b + + add(1, 2) # This is fine + add('1', '2') # This raises a TypeError + """ + + @wraps(func) + def wrapper(*args, **kwargs): + arg_names = func.__code__.co_varnames[: func.__code__.co_argcount] + arg_types = func.__annotations__ + + for name, value in list(zip(arg_names, args)) + list(kwargs.items()): + if name in arg_types and not isinstance(value, arg_types[name]): + raise TypeError( + f"Argument '{name}' is not of type" + f" '{arg_types[name].__name__}'" + ) + + return func(*args, **kwargs) + + return wrapper diff --git a/zeta/utils/img_to_tensor.py b/zeta/utils/img_to_tensor.py new file mode 100644 index 00000000..3315cef3 --- /dev/null +++ b/zeta/utils/img_to_tensor.py @@ -0,0 +1,40 @@ +from PIL import Image +from torchvision import transforms + + +def img_to_tensor(img: str = "pali.png", img_size: int = 256): + """ + Convert an image to a tensor. + + Args: + img (str): The path to the image file. Default is "pali.png". + img_size (int): The desired size of the image. Default is 256. + + Returns: + torch.Tensor: The image converted to a tensor. + + """ + # Load image + image = Image.open(img) + + # Define a transforms to convert the image to a tensor and apply preprocessing + transform = transforms.Compose( + [ + transforms.Lambda(lambda image: image.convert("RGB")), + transforms.Resize( + (img_size, img_size) + ), # Resize the image to 256x256 + transforms.ToTensor(), # Convert the image to a tensor, + transforms.Normalize( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ), # Normalize the pixel values + ] + ) + + # apply transforms to the image + x = transform(image) + + # Add batch dimension + x = x.unsqueeze(0) + + return x diff --git a/zeta/utils/log_pytorch_op.py b/zeta/utils/log_pytorch_op.py new file mode 100644 index 00000000..52dd560c --- /dev/null +++ b/zeta/utils/log_pytorch_op.py @@ -0,0 +1,88 @@ +import functools + +from loguru import logger +import time +import sys + + +# Configure loguru logger with advanced settings +logger.remove() +logger.add( + sys.stderr, + colorize=True, + format=" {time} {message} ", + backtrace=True, + diagnose=True, + enqueue=True, + catch=True, +) + + +def log_torch_op( + log_level: str = "DEBUG", + log_input_output: bool = True, + add_trace: bool = True, + log_execution_time: bool = True, + handle_exceptions: bool = True, +): + """ + Decorator function that logs the details of a function call, including input arguments, output result, + and execution time. It can also handle exceptions and add stack traces to the logs. + + Args: + log_level (str, optional): The log level to use. Defaults to "DEBUG". + log_input_output (bool, optional): Whether to log the input arguments and output result. Defaults to True. + add_trace (bool, optional): Whether to add stack traces to the logs when an exception occurs. Defaults to True. + log_execution_time (bool, optional): Whether to log the execution time of the function. Defaults to True. + handle_exceptions (bool, optional): Whether to handle exceptions and log them. Defaults to True. + + Returns: + function: The decorated function. + """ + + def decorator(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + if log_execution_time: + start_time = time.time() + + # Log function call details + if log_input_output: + args_repr = [repr(a) for a in args] + kwargs_repr = [f"{k}={v!r}" for k, v in kwargs.items()] + signature = ", ".join(args_repr + kwargs_repr) + logger.log( + log_level, f"Calling {func.__name__} with args: {signature}" + ) + + try: + result = func(*args, **kwargs) + if log_input_output: + logger.log( + log_level, f"{func.__name__} returned {result!r}" + ) + except Exception as e: + if handle_exceptions: + if add_trace: + logger.exception(f"Exception in {func.__name__}: {e}") + else: + logger.log( + log_level, f"Exception in {func.__name__}: {e}" + ) + raise # Ensure the exception is propagated + finally: + if log_execution_time: + end_time = time.time() + logger.log( + log_level, + ( + f"{func.__name__} executed in" + f" {end_time - start_time:.4f}s" + ), + ) + + return result + + return wrapper + + return decorator diff --git a/zeta/utils/main.py b/zeta/utils/main.py index 6172a2b2..9b5bc791 100644 --- a/zeta/utils/main.py +++ b/zeta/utils/main.py @@ -5,8 +5,8 @@ import einops import numpy as np import torch -import torch.functional as F import torch.nn as nn +import torch.nn.functional as F from accelerate import Accelerator from einops import rearrange from PIL import Image @@ -217,7 +217,7 @@ def pick_and_pop(keys, d): Returns: dict: A dictionary with the specified keys and their values. """ - values = list(map(lambda key: d.pop(key), keys)) + values = list(map(d.pop, keys)) return dict(zip(keys, values)) @@ -232,7 +232,7 @@ def group_dict_by_key(cond, d): Returns: tuple: Two dictionaries split based on the condition. """ - return_val = [dict(), dict()] + return_val = [{}, {}] for key in d.keys(): match = bool(cond(key)) ind = int(not match) @@ -283,7 +283,10 @@ def groupby_prefix_and_trim(prefix, d): partial(string_begins_with, prefix), d ) kwargs_without_prefix = dict( - map(lambda x: (x[0][len(prefix) :], x[1]), tuple(kwargs_with_prefix.items())) + map( + lambda x: (x[0][len(prefix) :], x[1]), + tuple(kwargs_with_prefix.items()), + ) ) return kwargs_without_prefix, kwargs @@ -316,7 +319,7 @@ def top_k(logits, thres=0.9): def top_a(logits, min_p_pow=2.0, min_p_ratio=0.02): - probs = F.softmax(logits, dim=-1) + probs = nn.Softmax(logits, dim=-1) limit = torch.pow(torch.max(probs), min_p_pow) * min_p_ratio logits[probs < limit] = float("-inf") @@ -339,7 +342,7 @@ def gumnel_sample(t, temperature=1.0, dim=-1): class ContrastiveTopK(nn.Module): def __init__(self, alpha, k): - super(ContrastiveTopK, self).__init__() + super().__init__() self.alpha = alpha self.k = k @@ -367,7 +370,9 @@ def forward(self, logits_exp, logits_ama): # scores scores = torch.where( - mask.bool(), torch.log(p_exp / (p_ama + 1e-8)), torch.tensor(-float("inf")) + mask.bool(), + torch.log(p_exp / (p_ama + 1e-8)), + torch.tensor(-float("inf")), ) return scores @@ -411,7 +416,9 @@ def __init__(self, dim, dim_out, *, time_emb_dim=None, groups=8): self.block1 = Block(dim, dim_out, groups=groups) self.block2 = Block(dim_out, dim_out, groups=groups) - self.res_conv = nn.Conv3d(dim, dim_out, 1) if dim != dim_out else nn.Identity() + self.res_conv = ( + nn.Conv3d(dim, dim_out, 1) if dim != dim_out else nn.Identity() + ) def forward(self, x, time_emb=None): scale_shift = None @@ -429,7 +436,9 @@ def forward(self, x, time_emb=None): def load_model(path): with open(path, "rb") as f: - return torch.load(f, map_location=torch.device("cpu")) + return torch.load( + f, map_location=torch.device("cpu"), weights_only=True + ) CHANNELS_TO_MODE = {1: "L", 3: "RGB", 4: "RGBA"} @@ -451,7 +460,7 @@ def seek_all_images(img, channels=3): # tensor of shape (channels, frames, height, width) -> GIF def video_tensor_to_gift(tensor, path, duration=120, loop=0, optimize=True): - images = map(T.ToPilImage(), tensor.unbind(dim=1)) + images = map(T.ToPILImage(), tensor.unbind(dim=1)) first_img, *rest_imgs = images first_img.save( path, @@ -495,8 +504,8 @@ def cast_num_frames(t, *, frames): return F.pad(t, (0, 0, 0, 0, 0, frames - f)) -def max_neg_values(tensor): - return -torch.info(tensor.dtype).max +def max_neg_values(t): + return t * -1e5 def l2norm(t, groups=1): @@ -577,7 +586,9 @@ def forward(self, x, **kwargs): def cosine_beta_schedule(timesteps, s=0.008): steps = timesteps + 1 x = torch.linspace(0, timesteps, steps, dtype=torch.float64) - alphas_cumprod = torch.cos(((x / timesteps) + s) / (1 + s) * torch.pi * 0.5) ** 2 + alphas_cumprod = ( + torch.cos(((x / timesteps) + s) / (1 + s) * torch.pi * 0.5) ** 2 + ) alphas_cumprod = alphas_cumprod / alphas_cumprod[0] betas = 1 - (alphas_cumprod[1:] / alphas_cumprod[:-1]) return torch.clip(betas, 0, 0.9999) @@ -615,7 +626,8 @@ def forward(self, x): def extra_repr(self): st = ( - f"logit_scale_init={self.logit_scale_init}, learnable={self.learnable}," + f"logit_scale_init={self.logit_scale_init}," + f" learnable={self.learnable}," f"max_logit_scale={self.max_logit_scale}" ) return st @@ -686,7 +698,9 @@ def interpolate_pos_encoding_2d(target_spatial_size, pos_embed): if N == target_spatial_size: return pos_embed dim = pos_embed.shape[-1] - pos_embed, updated = cast_if_src_dtype(pos_embed, torch.bfloat16, torch.float32) + pos_embed, updated = cast_if_src_dtype( + pos_embed, torch.bfloat16, torch.float32 + ) pos_embed = nn.functional.interpolate( pos_embed.reshape(1, int(math.sqrt(N)), int(math.sqrt(N)), dim).permute( 0, 3, 1, 2 @@ -695,14 +709,15 @@ def interpolate_pos_encoding_2d(target_spatial_size, pos_embed): mode="bicubic", ) if updated: - pos_embed, _ = cast_if_src_dtype(pos_embed, torch.float32, torch.bfloat16) + pos_embed, _ = cast_if_src_dtype( + pos_embed, torch.float32, torch.bfloat16 + ) pos_embed = pos_embed.permute(0, 2, 3, 1).view(1, -1, dim) return pos_embed ############# - # def init_bert_params(module): # def normal_(data): # data.copy_(data.cpu().normal_(mean=0.0, std=0.02).to(data.device)) @@ -746,7 +761,8 @@ def look_around(x, backward=1, forward=0, pad_value=-1, dim=2): padded_x = F.pad(x, (*dims, backward, forward), value=pad_value) tensors = [ - padded_x[:, ind : (ind + t), ...] for ind in range(forward + backward + 1) + padded_x[:, ind : (ind + t), ...] + for ind in range(forward + backward + 1) ] return torch.cat(tensors, dim=dim) @@ -764,7 +780,3 @@ def all_unique(arr): def apply_fns(fns, tensors): return [fn(tensors) for fn, tensor in zip(fns, tensors)] - - -def cast_tuple(t, length=1): - return t if isinstance(t, tuple) else ((t,) * length) diff --git a/zeta/utils/module_device.py b/zeta/utils/module_device.py new file mode 100644 index 00000000..4ee08881 --- /dev/null +++ b/zeta/utils/module_device.py @@ -0,0 +1,59 @@ +import torch +from torch.nn import Module + + +def module_device( + device_property_name: str = "device", + on_device_transfer=None, + compatibility_check: bool = False, +): + """Module device decorator. + + Args: + device_property_name (str, optional): _description_. Defaults to "device". + on_device_transfer (_type_, optional): _description_. Defaults to None. + compatibility_check (bool, optional): _description_. Defaults to False. + """ + + def decorator(klass): + assert issubclass( + klass, Module + ), "should decorate a subclass of torch.nn.Module" + + _orig_init = klass.__init__ + _orig_to = klass.to + + def __init__(self, *args, **kwargs): + _orig_init(self, *args, **kwargs) + self.register_buffer("_dummy", torch.tensor(0), persistent=False) + + def __to(self, device, *args, **kwargs): + if ( + compatibility_check + and not torch.cuda.is_available() + and "cuda" in str(device) + ): + raise RuntimeError( + "CUDA is not available for this device transfer." + ) + result = _orig_to(self, device, *args, **kwargs) + if on_device_transfer: + on_device_transfer(self, device) + return result + + @property + def _device_property(self): + devices = {p.device for p in self.parameters()} | { + b.device for b in self.buffers() + } + if len(devices) > 1: + return devices + return self._dummy.device + + klass.__init__ = __init__ + klass.to = __to + setattr(klass, device_property_name, _device_property) + + return klass + + return decorator diff --git a/zeta/utils/params.py b/zeta/utils/params.py new file mode 100644 index 00000000..4a437e7e --- /dev/null +++ b/zeta/utils/params.py @@ -0,0 +1,29 @@ +import torch.distributed as dist # Add this line + + +def print_num_params(model): + """Print the number of parameters in a model. + + Args: + model (_type_): _description_ + """ + n_params = sum(p.numel() for p in model.parameters() if p.requires_grad) + + if dist.is_available(): + if dist.get_rank() == 0: + print(f"Number of parameters in model: {n_params}") + else: + print(f"Number of parameters in model: {n_params}") + + +def print_main(msg): + """Print the message only on the main process. + + Args: + msg (_type_): _description_ + """ + if dist.is_available(): + if dist.get_rank() == 0: + print(msg) + else: + print(msg) diff --git a/zeta/utils/save_load_wrapper.py b/zeta/utils/save_load_wrapper.py new file mode 100644 index 00000000..44b13654 --- /dev/null +++ b/zeta/utils/save_load_wrapper.py @@ -0,0 +1,113 @@ +import pickle +from pathlib import Path + +import torch +from beartype import beartype +from beartype.typing import Callable, Optional +from torch.nn import Module + + +# helpers +def exists(v): + return v is not None + + +@beartype +def save_load( + save_method_name="save", + load_method_name="load", + config_instance_var_name="_config", + init_and_load_classmethod_name="init_and_load", + version: Optional[str] = None, + pre_save_hook: Optional[Callable[[Module], None]] = None, + post_load_hook: Optional[Callable[[Module], None]] = None, + compress: Optional[bool] = False, + partial_load: Optional[bool] = False, + *args, + **kwargs, +): + """Base decorator for save and load methods for torch.nn.Module subclasses. + + Args: + save_method_name (str, optional): _description_. Defaults to "save". + load_method_name (str, optional): _description_. Defaults to "load". + config_instance_var_name (str, optional): _description_. Defaults to "_config". + init_and_load_classmethod_name (str, optional): _description_. Defaults to "init_and_load". + version (Optional[str], optional): _description_. Defaults to None. + pre_save_hook (Optional[Callable[[Module], None]], optional): _description_. Defaults to None. + post_load_hook (Optional[Callable[[Module], None]], optional): _description_. Defaults to None. + compress (Optional[bool], optional): _description_. Defaults to False. + partial_load (Optional[bool], optional): _description_. Defaults to False. + """ + + def _save_load(klass): + assert issubclass( + klass, Module + ), "save_load should decorate a subclass of torch.nn.Module" + + _orig_init = klass.__init__ + + def __init__(self, *args, **kwargs): + _config = pickle.dumps((args, kwargs)) + setattr(self, config_instance_var_name, _config) + _orig_init(self, *args, **kwargs) + + def _save(self, path, overwrite=True): + if pre_save_hook: + pre_save_hook(self) + + path = Path(path) + assert overwrite or not path.exists() + pkg = dict( + model=self.state_dict(), + config=getattr(self, config_instance_var_name), + version=version, + ) + torch.save(pkg, str(path), _use_new_zipfile_serialization=compress) + + def _load(self, path, strict=True): + path = Path(path) + assert path.exists() + pkg = torch.load(str(path), map_location="cpu", weights_only=True) + + if ( + exists(version) + and exists(pkg["version"]) + and version.parse(version) != version.parse(pkg["version"]) + ): + self.print(f'loading saved model at version {pkg["version"]},') + + model_dict = self.state_dict() + if partial_load: + model_dict.update(pkg["model"]) + self.load_state_dict(model_dict, strict=strict) + else: + self.load_state_dict(pkg["model"], strict=strict) + + if post_load_hook: + post_load_hook(self) + + @classmethod + def _init_and_load_from(cls, path, strict=True): + path = Path(path) + assert path.exists() + pkg = torch.load(str(path), map_location="cpu", weights_only=True) + assert ( + "config" in pkg + ), "model configs were not found in this saved checkpoint" + + config = pickle.loads(pkg["config"]) + args, kwargs = config + model = cls(*args, **kwargs) + + _load(model, path, strict=strict) + return model + + klass.__init__ = __init__ + setattr(klass, save_method_name, _save) + setattr(klass, load_method_name, _load) + setattr(klass, init_and_load_classmethod_name, _init_and_load_from) + + return klass + + return _save_load diff --git a/zeta/utils/text_to_tensor.py b/zeta/utils/text_to_tensor.py new file mode 100644 index 00000000..5f11495a --- /dev/null +++ b/zeta/utils/text_to_tensor.py @@ -0,0 +1,31 @@ +from torch import nn + + +def text_to_tensor( + text: str, + tokenizer: callable, + process_func: callable, + dim: int, + num_tokens: int, +): + """ + Converts a given text into a tensor representation. + + Args: + text (str): The input text to be converted. + tokenizer (callable): A callable object that tokenizes the text. + process_func (callable): A callable object that processes the tokens. + dim (int): The dimension of the embedding. + num_tokens (int): The number of tokens in the vocabulary. + + Returns: + out: The tensor representation of the input text. + """ + tokens = tokenizer(text) + + # Truncate or pad the tokens to the specified length + tokens = process_func(tokens) + + # Convert the tokens to a tensor + out = nn.Embedding(num_tokens, dim)(tokens) + return out diff --git a/zeta/utils/verbose_execution.py b/zeta/utils/verbose_execution.py new file mode 100644 index 00000000..bdaffa3d --- /dev/null +++ b/zeta/utils/verbose_execution.py @@ -0,0 +1,26 @@ +from torch import Tensor, nn + + +class VerboseExecution(nn.Module): + """ + A wrapper class that adds verbosity to the execution of a given model. + + Args: + model (nn.Module): The model to be executed. + """ + + def __init__(self, model: nn.Module): + super().__init__() + self.model = model + + for name, layer in self.model.named_children(): + for name, layer in self.model.named_children(): + layer.__name__ = name + layer.register_forward_hook( + lambda layer, _, output: print( + f"{layer.__name__} output: {output.shape}" + ) + ) + + def forward(self, x: Tensor) -> Tensor: + return self.model(x) diff --git a/zeta/utils/vision_utils.py b/zeta/utils/vision_utils.py index 13f93b6f..9b3e0b91 100644 --- a/zeta/utils/vision_utils.py +++ b/zeta/utils/vision_utils.py @@ -1,3 +1,7 @@ +"""Vision utilities for image preprocessing, etc.""" + +# noqa: E501 + import base64 import os from io import BytesIO @@ -6,7 +10,6 @@ import numpy as np import requests from packaging import version - from transformers.utils import ( ExplicitEnum, is_jax_tensor, @@ -22,9 +25,9 @@ import PIL.Image import PIL.ImageOps - if version.parse(version.parse(PIL.__version__).base_version) >= version.parse( - "9.1.0" - ): + if version.parse( + version.parse(PIL.__version__).base_version + ) >= version.parse("9.1.0"): PILImageResampling = PIL.Image.Resampling else: PILImageResampling = PIL.Image @@ -33,7 +36,6 @@ if is_torch_available(): import torch - ImageInput = Union[ "PIL.Image.Image", np.ndarray, @@ -121,13 +123,14 @@ def make_list_of_images(images, expected_ndims: int = 3) -> List[ImageInput]: images = [images] else: raise ValueError( - f"Invalid image shape. Expected either {expected_ndims + 1} or {expected_ndims} dimensions, but got" + f"Invalid image shape. Expected either {expected_ndims + 1} or" + f" {expected_ndims} dimensions, but got" f" {images.ndim} dimensions." ) return images raise ValueError( - "Invalid image type. Expected either PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or " - f"jax.ndarray, but got {type(images)}." + "Invalid image type. Expected either PIL.Image.Image, numpy.ndarray," + f" torch.Tensor, tf.Tensor or jax.ndarray, but got {type(images)}." ) @@ -141,7 +144,8 @@ def to_numpy_array(img) -> np.ndarray: def infer_channel_dimension_format( - image: np.ndarray, num_channels: Optional[Union[int, Tuple[int, ...]]] = None + image: np.ndarray, + num_channels: Union[int, Tuple[int, ...], None] = None, ) -> ChannelDimension: """ Infers the channel dimension format of `image`. @@ -156,14 +160,18 @@ def infer_channel_dimension_format( The channel dimension of the image. """ num_channels = num_channels if num_channels is not None else (1, 3) - num_channels = (num_channels,) if isinstance(num_channels, int) else num_channels + num_channels = ( + (num_channels,) if isinstance(num_channels, int) else num_channels + ) if image.ndim == 3: first_dim, last_dim = 0, 2 elif image.ndim == 4: first_dim, last_dim = 1, 3 else: - raise ValueError(f"Unsupported number of image dimensions: {image.ndim}") + raise ValueError( + f"Unsupported number of image dimensions: {image.ndim}" + ) if image.shape[first_dim] in num_channels: return ChannelDimension.FIRST @@ -173,7 +181,8 @@ def infer_channel_dimension_format( def get_channel_dimension_axis( - image: np.ndarray, input_data_format: Optional[Union[ChannelDimension, str]] = None + image: np.ndarray, + input_data_format: Union[ChannelDimension, str, None] = None, ) -> int: """ Returns the channel dimension axis of the image. @@ -223,7 +232,7 @@ def get_image_size( def is_valid_annotation_coco_detection( - annotation: Dict[str, Union[List, Tuple]] + annotation: Dict[str, Union[List, Tuple]], ) -> bool: if ( isinstance(annotation, dict) @@ -241,7 +250,7 @@ def is_valid_annotation_coco_detection( def is_valid_annotation_coco_panoptic( - annotation: Dict[str, Union[List, Tuple]] + annotation: Dict[str, Union[List, Tuple]], ) -> bool: if ( isinstance(annotation, dict) @@ -260,13 +269,13 @@ def is_valid_annotation_coco_panoptic( def valid_coco_detection_annotations( - annotations: Iterable[Dict[str, Union[List, Tuple]]] + annotations: Iterable[Dict[str, Union[List, Tuple]]], ) -> bool: return all(is_valid_annotation_coco_detection(ann) for ann in annotations) def valid_coco_panoptic_annotations( - annotations: Iterable[Dict[str, Union[List, Tuple]]] + annotations: Iterable[Dict[str, Union[List, Tuple]]], ) -> bool: return all(is_valid_annotation_coco_panoptic(ann) for ann in annotations) @@ -306,13 +315,16 @@ def load_image( image = PIL.Image.open(BytesIO(b64)) except Exception as e: raise ValueError( - f"Incorrect image source. Must be a valid URL starting with `http://` or `https://`, a valid path to an image file, or a base64 encoded string. Got {image}. Failed with {e}" + "Incorrect image source. Must be a valid URL starting with" + " `http://` or `https://`, a valid path to an image file," + f" or a base64 encoded string. Got {image}. Failed with {e}" ) elif isinstance(image, PIL.Image.Image): image = image else: raise ValueError( - "Incorrect format used for image. Should be an url linking to an image, a base64 string, a local path, or a PIL image." + "Incorrect format used for image. Should be an url linking to an" + " image, a base64 string, a local path, or a PIL image." ) image = PIL.ImageOps.exif_transpose(image) image = image.convert("RGB") @@ -326,12 +338,12 @@ class ImageFeatureExtractionMixin: """ def _ensure_format_supported(self, image): - if not isinstance(image, (PIL.Image.Image, np.ndarray)) and not is_torch_tensor( - image - ): + if not isinstance( + image, (PIL.Image.Image, np.ndarray) + ) and not is_torch_tensor(image): raise ValueError( - f"Got type {type(image)} which is not supported, only `PIL.Image.Image`, `np.array` and " - "`torch.Tensor` are." + f"Got type {type(image)} which is not supported, only" + " `PIL.Image.Image`, `np.array` and `torch.Tensor` are." ) def to_pil_image(self, image, rescale=None): @@ -378,7 +390,9 @@ def convert_rgb(self, image): return image.convert("RGB") - def rescale(self, image: np.ndarray, scale: Union[float, int]) -> np.ndarray: + def rescale( + self, image: np.ndarray, scale: Union[float, int] + ) -> np.ndarray: """ Rescale a numpy image by scale amount """ @@ -407,7 +421,11 @@ def to_numpy_array(self, image, rescale=None, channel_first=True): if is_torch_tensor(image): image = image.numpy() - rescale = isinstance(image.flat[0], np.integer) if rescale is None else rescale + rescale = ( + isinstance(image.flat[0], np.integer) + if rescale is None + else rescale + ) if rescale: image = self.rescale(image.astype(np.float32), 1 / 255.0) @@ -483,7 +501,9 @@ def normalize(self, image, mean, std, rescale=False): else: return (image - mean) / std - def resize(self, image, size, resample=None, default_to_square=True, max_size=None): + def resize( + self, image, size, resample=None, default_to_square=True, max_size=None + ): """ Resizes `image`. Enforces conversion of input to PIL.Image. @@ -513,7 +533,9 @@ def resize(self, image, size, resample=None, default_to_square=True, max_size=No Returns: image: A resized `PIL.Image.Image`. """ - resample = resample if resample is not None else PILImageResampling.BILINEAR + resample = ( + resample if resample is not None else PILImageResampling.BILINEAR + ) self._ensure_format_supported(image) @@ -525,11 +547,17 @@ def resize(self, image, size, resample=None, default_to_square=True, max_size=No if isinstance(size, int) or len(size) == 1: if default_to_square: - size = (size, size) if isinstance(size, int) else (size[0], size[0]) + size = ( + (size, size) + if isinstance(size, int) + else (size[0], size[0]) + ) else: width, height = image.size # specified size only for the smallest edge - short, long = (width, height) if width <= height else (height, width) + short, long = ( + (width, height) if width <= height else (height, width) + ) requested_new_short = size if isinstance(size, int) else size[0] if short == requested_new_short: @@ -542,8 +570,9 @@ def resize(self, image, size, resample=None, default_to_square=True, max_size=No if max_size is not None: if max_size <= requested_new_short: raise ValueError( - f"max_size = {max_size} must be strictly greater than the requested " - f"size for the smaller edge size = {size}" + f"max_size = {max_size} must be strictly greater" + " than the requested size for the smaller edge" + f" size = {size}" ) if new_long > max_size: new_short, new_long = ( @@ -552,7 +581,9 @@ def resize(self, image, size, resample=None, default_to_square=True, max_size=No ) size = ( - (new_short, new_long) if width <= height else (new_long, new_short) + (new_short, new_long) + if width <= height + else (new_long, new_short) ) return image.resize(size, resample=resample)