From 20a622236f1e7f1f4d84be3e54c0bc91d3a8ce49 Mon Sep 17 00:00:00 2001 From: savitamittal1 <39776179+savitamittal1@users.noreply.github.com> Date: Fri, 29 Sep 2023 10:01:42 -0700 Subject: [PATCH] intel optimization (#2692) --- ...ain-hyperparameter-tune-with-sklearn.ipynb | 218 +++++++++++++++++- 1 file changed, 216 insertions(+), 2 deletions(-) diff --git a/sdk/python/jobs/single-step/scikit-learn/train-hyperparameter-tune-deploy-with-sklearn/train-hyperparameter-tune-with-sklearn.ipynb b/sdk/python/jobs/single-step/scikit-learn/train-hyperparameter-tune-deploy-with-sklearn/train-hyperparameter-tune-with-sklearn.ipynb index 1cf1d2e594..700d9fc0fb 100644 --- a/sdk/python/jobs/single-step/scikit-learn/train-hyperparameter-tune-deploy-with-sklearn/train-hyperparameter-tune-with-sklearn.ipynb +++ b/sdk/python/jobs/single-step/scikit-learn/train-hyperparameter-tune-deploy-with-sklearn/train-hyperparameter-tune-with-sklearn.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -26,6 +27,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -57,6 +59,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -78,6 +81,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -105,6 +109,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -115,6 +120,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -176,6 +182,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -204,10 +211,11 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "Now, create the file in the dependencies directory." + "Now, create the file in the dependencies directory. You can also optionally install Intel® Extension for Scikit-Learn in your yaml file for additional performance no your Intel hardware. More details can be found at the end of this section." ] }, { @@ -234,6 +242,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -269,6 +278,81 @@ ] }, { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **[Optional] Install Intel® Extension for Scikit-Learn optimizations for more performance on Intel hardware**" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Want to speed up your scikit-learn scripts on Intel hardware? Try adding [Intel® Extension for Scikit-Learn](https://www.intel.com/content/www/us/en/developer/tools/oneapi/scikit-learn.html) into your conda yaml file. We will show you how to enable these optimizations later in this example:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "name": "make_sklearnex_conda_file" + }, + "outputs": [], + "source": [ + "%%writefile {dependencies_dir}/conda.yaml\n", + "name: sklearn-env\n", + "channels:\n", + " - conda-forge\n", + "dependencies:\n", + " - python=3.8\n", + " - pip=21.2.4\n", + " - scikit-learn=0.24.2\n", + " - scikit-learn-intelex\n", + " - scipy=1.7.1\n", + " - pip: \n", + " - mlflow== 1.26.1\n", + " - azureml-mlflow==1.42.0\n", + " - mlflow-skinny==2.3.2" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The specification contains some usual packages, that you'll use in your job (numpy, pip), along with Intel® Extension for Scikit-Learn.\n", + "\n", + "\n", + "Use the *yaml* file to create and register this custom environment in your workspace:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.ai.ml.entities import Environment\n", + "\n", + "custom_env_name = \"sklearn-env\"\n", + "\n", + "job_env = Environment(\n", + " name=custom_env_name,\n", + " description=\"Custom environment for sklearn image classification\",\n", + " conda_file=os.path.join(dependencies_dir, \"conda.yaml\"),\n", + " image=\"mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest\",\n", + ")\n", + "job_env = ml_client.environments.create_or_update(job_env)\n", + "\n", + "print(\n", + " f\"Environment with name {job_env.name} is registered to workspace, the environment version is {job_env.version}\"\n", + ")" + ] + }, + { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -302,10 +386,11 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "Now, create the script file in the source directory." + "Now, create the script file in the source directory. If you want to use Intel® Extension for Scikit-Learn optimizations as part of this script, take a look at the alternative script file found at the end of this section." ] }, { @@ -405,6 +490,128 @@ ] }, { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **[Optional]** Enable Intel® Extension for Scikit-Learn optimizations for more performance on Intel hardware**" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you have installed Intel® Extension for Scikit-Learn (as demonstrated in the previous section), you can enable the performance optimizations by adding the two lines of code to the top of the script file, as shown below.\n", + "\n", + "To learn more about Intel® Extension for Scikit-Learn, visit the package's [documentation](https://intel.github.io/scikit-learn-intelex/)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "name": "create_sklearnex_script_file" + }, + "outputs": [], + "source": [ + "%%writefile {src_dir}/train_iris.py\n", + "# Modified from https://www.geeksforgeeks.org/multiclass-classification-using-scikit-learn/\n", + "\n", + "import argparse\n", + "import os\n", + "\n", + "# Import and enable Intel Extension for Scikit-learn optimizations\n", + "# where possible\n", + "\n", + "from sklearnex import patch_sklearn\n", + "patch_sklearn()\n", + "\n", + "# importing necessary libraries\n", + "import numpy as np\n", + "\n", + "\n", + "from sklearn import datasets\n", + "from sklearn.metrics import confusion_matrix\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "import joblib\n", + "\n", + "import mlflow\n", + "import mlflow.sklearn\n", + "\n", + "def main():\n", + " parser = argparse.ArgumentParser()\n", + "\n", + " parser.add_argument('--kernel', type=str, default='linear',\n", + " help='Kernel type to be used in the algorithm')\n", + " parser.add_argument('--penalty', type=float, default=1.0,\n", + " help='Penalty parameter of the error term')\n", + "\n", + " # Start Logging\n", + " mlflow.start_run()\n", + "\n", + " # enable autologging\n", + " mlflow.sklearn.autolog()\n", + "\n", + " args = parser.parse_args()\n", + " mlflow.log_param('Kernel type', str(args.kernel))\n", + " mlflow.log_metric('Penalty', float(args.penalty))\n", + "\n", + " # loading the iris dataset\n", + " iris = datasets.load_iris()\n", + "\n", + " # X -> features, y -> label\n", + " X = iris.data\n", + " y = iris.target\n", + "\n", + " # dividing X, y into train and test data\n", + " X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)\n", + "\n", + " # training a linear SVM classifier\n", + " from sklearn.svm import SVC\n", + " svm_model_linear = SVC(kernel=args.kernel, C=args.penalty)\n", + " svm_model_linear = svm_model_linear.fit(X_train, y_train)\n", + " svm_predictions = svm_model_linear.predict(X_test)\n", + "\n", + " # model accuracy for X_test\n", + " accuracy = svm_model_linear.score(X_test, y_test)\n", + " print('Accuracy of SVM classifier on test set: {:.2f}'.format(accuracy))\n", + " mlflow.log_metric('Accuracy', float(accuracy))\n", + " # creating a confusion matrix\n", + " cm = confusion_matrix(y_test, svm_predictions)\n", + " print(cm)\n", + "\n", + " registered_model_name=\"sklearn-iris-flower-classify-model\"\n", + "\n", + " ##########################\n", + " #\n", + " ##########################\n", + " # Registering the model to the workspace\n", + " print(\"Registering the model via MLFlow\")\n", + " mlflow.sklearn.log_model(\n", + " sk_model=svm_model_linear,\n", + " registered_model_name=registered_model_name,\n", + " artifact_path=registered_model_name\n", + " )\n", + "\n", + " # # Saving the model to a file\n", + " print(\"Saving the model via MLFlow\")\n", + " mlflow.sklearn.save_model(\n", + " sk_model=svm_model_linear,\n", + " path=os.path.join(registered_model_name, \"trained_model\"),\n", + " )\n", + " ###########################\n", + " #\n", + " ###########################\n", + " mlflow.end_run()\n", + "\n", + "if __name__ == '__main__':\n", + " main()" + ] + }, + { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -441,6 +648,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -463,6 +671,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -475,6 +684,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -505,6 +715,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -532,6 +743,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -556,6 +768,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -600,6 +813,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [