From 20a622236f1e7f1f4d84be3e54c0bc91d3a8ce49 Mon Sep 17 00:00:00 2001
From: savitamittal1 <39776179+savitamittal1@users.noreply.github.com>
Date: Fri, 29 Sep 2023 10:01:42 -0700
Subject: [PATCH] intel optimization (#2692)

---
 ...ain-hyperparameter-tune-with-sklearn.ipynb | 218 +++++++++++++++++-
 1 file changed, 216 insertions(+), 2 deletions(-)

diff --git a/sdk/python/jobs/single-step/scikit-learn/train-hyperparameter-tune-deploy-with-sklearn/train-hyperparameter-tune-with-sklearn.ipynb b/sdk/python/jobs/single-step/scikit-learn/train-hyperparameter-tune-deploy-with-sklearn/train-hyperparameter-tune-with-sklearn.ipynb
index 1cf1d2e594..700d9fc0fb 100644
--- a/sdk/python/jobs/single-step/scikit-learn/train-hyperparameter-tune-deploy-with-sklearn/train-hyperparameter-tune-with-sklearn.ipynb
+++ b/sdk/python/jobs/single-step/scikit-learn/train-hyperparameter-tune-deploy-with-sklearn/train-hyperparameter-tune-with-sklearn.ipynb
@@ -1,6 +1,7 @@
 {
  "cells": [
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -26,6 +27,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -57,6 +59,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -78,6 +81,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -105,6 +109,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -115,6 +120,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -176,6 +182,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -204,10 +211,11 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Now, create the file in the dependencies directory."
+    "Now, create the file in the dependencies directory. You can also optionally install Intel® Extension for Scikit-Learn in your yaml file for additional performance no your Intel hardware. More details can be found at the end of this section."
    ]
   },
   {
@@ -234,6 +242,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -269,6 +278,81 @@
    ]
   },
   {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### **[Optional] Install Intel® Extension for Scikit-Learn optimizations for more performance on Intel hardware**"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Want to speed up your scikit-learn scripts on Intel hardware? Try adding [Intel® Extension for Scikit-Learn](https://www.intel.com/content/www/us/en/developer/tools/oneapi/scikit-learn.html) into your conda yaml file. We will show you how to enable these optimizations later in this example:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "name": "make_sklearnex_conda_file"
+   },
+   "outputs": [],
+   "source": [
+    "%%writefile {dependencies_dir}/conda.yaml\n",
+    "name: sklearn-env\n",
+    "channels:\n",
+    "  - conda-forge\n",
+    "dependencies:\n",
+    "  - python=3.8\n",
+    "  - pip=21.2.4\n",
+    "  - scikit-learn=0.24.2\n",
+    "  - scikit-learn-intelex\n",
+    "  - scipy=1.7.1\n",
+    "  - pip:  \n",
+    "    - mlflow== 1.26.1\n",
+    "    - azureml-mlflow==1.42.0\n",
+    "    - mlflow-skinny==2.3.2"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The specification contains some usual packages, that you'll use in your job (numpy, pip), along with Intel® Extension for Scikit-Learn.\n",
+    "\n",
+    "\n",
+    "Use the *yaml* file to create and register this custom environment in your workspace:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure.ai.ml.entities import Environment\n",
+    "\n",
+    "custom_env_name = \"sklearn-env\"\n",
+    "\n",
+    "job_env = Environment(\n",
+    "    name=custom_env_name,\n",
+    "    description=\"Custom environment for sklearn image classification\",\n",
+    "    conda_file=os.path.join(dependencies_dir, \"conda.yaml\"),\n",
+    "    image=\"mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest\",\n",
+    ")\n",
+    "job_env = ml_client.environments.create_or_update(job_env)\n",
+    "\n",
+    "print(\n",
+    "    f\"Environment with name {job_env.name} is registered to workspace, the environment version is {job_env.version}\"\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -302,10 +386,11 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Now, create the script file in the source directory."
+    "Now, create the script file in the source directory. If you want to use Intel® Extension for Scikit-Learn optimizations as part of this script, take a look at the alternative script file found at the end of this section."
    ]
   },
   {
@@ -405,6 +490,128 @@
    ]
   },
   {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### **[Optional]** Enable Intel® Extension for Scikit-Learn optimizations for more performance on Intel hardware**"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If you have installed Intel® Extension for Scikit-Learn (as demonstrated in the previous section), you can enable the performance optimizations by adding the two lines of code to the top of the script file, as shown below.\n",
+    "\n",
+    "To learn more about Intel® Extension for Scikit-Learn, visit the package's [documentation](https://intel.github.io/scikit-learn-intelex/)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "name": "create_sklearnex_script_file"
+   },
+   "outputs": [],
+   "source": [
+    "%%writefile {src_dir}/train_iris.py\n",
+    "# Modified from https://www.geeksforgeeks.org/multiclass-classification-using-scikit-learn/\n",
+    "\n",
+    "import argparse\n",
+    "import os\n",
+    "\n",
+    "# Import and enable Intel Extension for Scikit-learn optimizations\n",
+    "# where possible\n",
+    "\n",
+    "from sklearnex import patch_sklearn\n",
+    "patch_sklearn()\n",
+    "\n",
+    "# importing necessary libraries\n",
+    "import numpy as np\n",
+    "\n",
+    "\n",
+    "from sklearn import datasets\n",
+    "from sklearn.metrics import confusion_matrix\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "import joblib\n",
+    "\n",
+    "import mlflow\n",
+    "import mlflow.sklearn\n",
+    "\n",
+    "def main():\n",
+    "    parser = argparse.ArgumentParser()\n",
+    "\n",
+    "    parser.add_argument('--kernel', type=str, default='linear',\n",
+    "                        help='Kernel type to be used in the algorithm')\n",
+    "    parser.add_argument('--penalty', type=float, default=1.0,\n",
+    "                        help='Penalty parameter of the error term')\n",
+    "\n",
+    "    # Start Logging\n",
+    "    mlflow.start_run()\n",
+    "\n",
+    "    # enable autologging\n",
+    "    mlflow.sklearn.autolog()\n",
+    "\n",
+    "    args = parser.parse_args()\n",
+    "    mlflow.log_param('Kernel type', str(args.kernel))\n",
+    "    mlflow.log_metric('Penalty', float(args.penalty))\n",
+    "\n",
+    "    # loading the iris dataset\n",
+    "    iris = datasets.load_iris()\n",
+    "\n",
+    "    # X -> features, y -> label\n",
+    "    X = iris.data\n",
+    "    y = iris.target\n",
+    "\n",
+    "    # dividing X, y into train and test data\n",
+    "    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)\n",
+    "\n",
+    "    # training a linear SVM classifier\n",
+    "    from sklearn.svm import SVC\n",
+    "    svm_model_linear = SVC(kernel=args.kernel, C=args.penalty)\n",
+    "    svm_model_linear = svm_model_linear.fit(X_train, y_train)\n",
+    "    svm_predictions = svm_model_linear.predict(X_test)\n",
+    "\n",
+    "    # model accuracy for X_test\n",
+    "    accuracy = svm_model_linear.score(X_test, y_test)\n",
+    "    print('Accuracy of SVM classifier on test set: {:.2f}'.format(accuracy))\n",
+    "    mlflow.log_metric('Accuracy', float(accuracy))\n",
+    "    # creating a confusion matrix\n",
+    "    cm = confusion_matrix(y_test, svm_predictions)\n",
+    "    print(cm)\n",
+    "\n",
+    "    registered_model_name=\"sklearn-iris-flower-classify-model\"\n",
+    "\n",
+    "    ##########################\n",
+    "    #<save and register model>\n",
+    "    ##########################\n",
+    "    # Registering the model to the workspace\n",
+    "    print(\"Registering the model via MLFlow\")\n",
+    "    mlflow.sklearn.log_model(\n",
+    "        sk_model=svm_model_linear,\n",
+    "        registered_model_name=registered_model_name,\n",
+    "        artifact_path=registered_model_name\n",
+    "    )\n",
+    "\n",
+    "    # # Saving the model to a file\n",
+    "    print(\"Saving the model via MLFlow\")\n",
+    "    mlflow.sklearn.save_model(\n",
+    "        sk_model=svm_model_linear,\n",
+    "        path=os.path.join(registered_model_name, \"trained_model\"),\n",
+    "    )\n",
+    "    ###########################\n",
+    "    #</save and register model>\n",
+    "    ###########################\n",
+    "    mlflow.end_run()\n",
+    "\n",
+    "if __name__ == '__main__':\n",
+    "    main()"
+   ]
+  },
+  {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -441,6 +648,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -463,6 +671,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -475,6 +684,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -505,6 +715,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -532,6 +743,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -556,6 +768,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -600,6 +813,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [