diff --git a/.gitignore b/.gitignore index 0bdbf412..a60d4cf3 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ .ipynb_checkpoints *scratch.ipynb *scratch.py +tmp/ # IPython profile_default/ diff --git a/.pylintrc b/.pylintrc index e1ec406e..4a16e611 100644 --- a/.pylintrc +++ b/.pylintrc @@ -421,6 +421,6 @@ valid-metaclass-classmethod-first-arg=mcs # Exceptions that will emit a warning when being caught. Defaults to # "Exception" -overgeneral-exceptions=StandardError, - Exception, - BaseException +overgeneral-exceptions=builtins.StandardError, + builtins.Exception, + builtins.BaseException diff --git a/data/nlu_evals_sample.csv b/data/nlu_evals_sample.csv new file mode 100644 index 00000000..f6d1e45e --- /dev/null +++ b/data/nlu_evals_sample.csv @@ -0,0 +1,19 @@ +flow_display_name,utterance,page_display_name,expected_intent,expected_parameters,description +Default Start Flow,I need to get my order status,START_PAGE,head_intent.order_status,,Demo Tests +Default Start Flow,Trying to check the status of my order,START_PAGE,head_intent.order_status,,Demo Tests +Default Start Flow,I hate this order status agent!,START_PAGE,head_intent.order_status,,Demo Tests +Default Start Flow,Wha'ts the point of ordering anything?,START_PAGE,NO_MATCH,,Demo Tests +Default Start Flow,I was looking at the order of operations yesterday but couldn't figure it out,START_PAGE,NO_MATCH,,Demo Tests +Default Start Flow,Thanks for getting my that status so quickly!,START_PAGE,head_intent.order_status,,Demo Tests +Default Start Flow,I need to make a payment,START_PAGE,head_intent.pay_arrangement,,Demo Tests +Default Start Flow,I'm trying to setup a new payment,START_PAGE,head_intent.pay_arrangement,,Demo Tests +Default Start Flow,Did you get the pavement,START_PAGE,NO_MATCH,,Demo Tests +Default Start Flow,I've been trying to pay my bill all day!,START_PAGE,head_intent.pay_arrangement,,Demo Tests +Default Start Flow,Why can't I get my bill paid at all online?,START_PAGE,head_intent.pay_arrangement,,Demo Tests +Default Start Flow,Why can't I get my bill paid at all online? Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?,START_PAGE,head_intent.pay_arrangement,,Demo Tests +Date Collection,12/13/2022,Collect Date,-,"{'collected_date': +{ ""day"": 13, ""month"": 12, ""year"": 2022 }, 'formatted_date': '13/12/2022'}}",Demo Tests +Proper Names Demo,Patrick Marlow,names,-,"{'people_names': +{ ""original"": ""Patrick Marlow"", ""name"": ""Patrick Marlow"" }}",Demo Tests +Proper Names Demo,Andrew Smith,names,,"{'people_names': +{ ""original"": ""Andrew Smith"", ""name"": ""Andrew Smith"" }}",Demo Tests diff --git a/examples/nlu_analysis_series/nlu_evaluation_testing.ipynb b/examples/nlu_analysis_series/nlu_evaluation_testing.ipynb new file mode 100644 index 00000000..c9660c9a --- /dev/null +++ b/examples/nlu_analysis_series/nlu_evaluation_testing.ipynb @@ -0,0 +1,421 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Copyright 2023 Google LLC\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# NLU Evaluation Testing\n", + "In this notebook, we will show you how to perform bulk NLU testing by providing a large input corpus and receiving the predicted Intent and Parameter extraction results from your agent.\n", + "\n", + "## Prerequisites\n", + "- Ensure you have a GCP Service Account key with the Dialogflow API Admin privileges assigned to it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# If you haven't already, make sure you install the `dfcx-scrapi` library\n", + "\n", + "!pip install dfcx-scrapi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "from dfcx_scrapi.tools.nlu_evals import NluEvals" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# (Option 1) Google Sheet as Input\n", + "The primary option for running the NLU Eval pipeline revolves around using Google Sheets as a source for your data. \n", + "This method provides a simple, streamlined system that can pull and push data all into a single Google Sheet.\n", + "\n", + "In order to run the full NLU evaluation test, the following inputs are needed:\n", + "- `agent_id`, The Dialogflow CX Agent ID.\n", + "- `input_google_sheet`, the Display Name of the Google Sheet.\n", + "- `input_google_sheet_tab`, the Display Name of the tab on the Google Sheet where your input data lives.\n", + "- `output_google_sheet_results`, the Display Name of the tab on the Google Sheet where you want the full output results to be written.\n", + "- `output_google_sheet_summary`, the Display Name of the tab on the Google Sheet where you want the report summary to be written.\n", + "\n", + "_**NOTE** - In order for your Service Account to access your Google Sheet (read / write) you need to share the Google Sheet with your Service Account email address._\n", + "\n", + "You can find a [Sample Google Sheet dataset](https://docs.google.com/spreadsheets/d/e/2PACX-1vREvsZAktNvRr78KjUBlZl2PVUHKJru8hRCgmuDi9kn_oDT_weFKkGmyoQwRPdj0JcxK1kNzgceAPA5/pubhtml#) here." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "agent_id = ''\n", + "creds_path = ''\n", + "\n", + "# Sample Inputs\n", + "input_google_sheet = 'Dialogflow CX SCRAPI - NLU Eval Sample Dataset'\n", + "input_google_sheet_tab = 'input_dataset'\n", + "output_google_sheet_results = 'results'\n", + "output_google_sheet_summary = 'summary'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run NLU Evals\n", + "There are 3 main stages that happen for the Eval Pipeline:\n", + "1. Process and validate the input data\n", + "2. Run the Eval Tests\n", + "3. Write the output summary and details to a report." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-11 11:40:31 INFO ---------- STARTING Evals ----------\n", + "2023-09-11 11:40:37 WARNING Text input is too long. Truncating to 256 characters.\n", + "2023-09-11 11:40:37 WARNING TRUNCATED TEXT: Why can't I get my bill paid at all online? Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?Why can't I get my bill paid at all onli\n", + "2023-09-11 11:40:41 INFO Progress(0/15)[> ] 0.00%\n", + "2023-09-11 11:40:51 INFO Progress(15/15)[------------------------------------------------->] 100.00%\n", + "2023-09-11 11:40:51 INFO ---------- Evals COMPLETE ----------\n" + ] + } + ], + "source": [ + "nlu = NluEvals(agent_id, creds_path=creds_path)\n", + "\n", + "df = nlu.process_input_google_sheet(input_google_sheet, input_google_sheet_tab)\n", + "df = nlu.run_evals(df)\n", + "nlu.write_results_to_sheets(df, input_google_sheet, output_google_sheet_results, output_google_sheet_summary)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Inspect Results Locally\n", + "You can also inspect and filter the results of your tests locally as needed." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
flow_display_namepage_display_nameutteranceexpected_intentexpected_parameterstarget_pagematch_typeconfidenceparameters_setdetected_intentagent_display_namedescriptioninput_source
0Default Start FlowSTART_PAGEI need to get my order statushead_intent.order_statussentiment_routerINTENT1.000000head_intent.order_status[Demo] Multi Demo Extravaganza Part Deux: The RevengeDemo Testsinput_dataset
1Default Start FlowSTART_PAGETrying to check the status of my orderhead_intent.order_statussentiment_routerINTENT0.947959head_intent.order_status[Demo] Multi Demo Extravaganza Part Deux: The RevengeDemo Testsinput_dataset
2Default Start FlowSTART_PAGEI hate this order status agent!head_intent.order_statussentiment_routerINTENT0.955709head_intent.order_status[Demo] Multi Demo Extravaganza Part Deux: The RevengeDemo Testsinput_dataset
3Default Start FlowSTART_PAGEWha'ts the point of ordering anything?NO_MATCHsentiment_routerINTENT0.841712head_intent.order_status[Demo] Multi Demo Extravaganza Part Deux: The RevengeDemo Testsinput_dataset
4Default Start FlowSTART_PAGEI was looking at the order of operations yesterday but couldn't figure it outNO_MATCHsentiment_routerINTENT0.790275head_intent.order_status[Demo] Multi Demo Extravaganza Part Deux: The RevengeDemo Testsinput_dataset
\n", + "
" + ], + "text/plain": [ + " flow_display_name page_display_name \\\n", + "0 Default Start Flow START_PAGE \n", + "1 Default Start Flow START_PAGE \n", + "2 Default Start Flow START_PAGE \n", + "3 Default Start Flow START_PAGE \n", + "4 Default Start Flow START_PAGE \n", + "\n", + " utterance \\\n", + "0 I need to get my order status \n", + "1 Trying to check the status of my order \n", + "2 I hate this order status agent! \n", + "3 Wha'ts the point of ordering anything? \n", + "4 I was looking at the order of operations yesterday but couldn't figure it out \n", + "\n", + " expected_intent expected_parameters target_page match_type \\\n", + "0 head_intent.order_status sentiment_router INTENT \n", + "1 head_intent.order_status sentiment_router INTENT \n", + "2 head_intent.order_status sentiment_router INTENT \n", + "3 NO_MATCH sentiment_router INTENT \n", + "4 NO_MATCH sentiment_router INTENT \n", + "\n", + " confidence parameters_set detected_intent \\\n", + "0 1.000000 head_intent.order_status \n", + "1 0.947959 head_intent.order_status \n", + "2 0.955709 head_intent.order_status \n", + "3 0.841712 head_intent.order_status \n", + "4 0.790275 head_intent.order_status \n", + "\n", + " agent_display_name description \\\n", + "0 [Demo] Multi Demo Extravaganza Part Deux: The Revenge Demo Tests \n", + "1 [Demo] Multi Demo Extravaganza Part Deux: The Revenge Demo Tests \n", + "2 [Demo] Multi Demo Extravaganza Part Deux: The Revenge Demo Tests \n", + "3 [Demo] Multi Demo Extravaganza Part Deux: The Revenge Demo Tests \n", + "4 [Demo] Multi Demo Extravaganza Part Deux: The Revenge Demo Tests \n", + "\n", + " input_source \n", + "0 input_dataset \n", + "1 input_dataset \n", + "2 input_dataset \n", + "3 input_dataset \n", + "4 input_dataset " + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# (Option 2) CSV as Input\n", + "Similar to the above pipeline, except we will process the input data from a CSV file.\n", + "\n", + "For the output to local files, you will need to define 2 output destinations:\n", + "1. An output file for the full detailed results\n", + "2. An output file for the report summary\n", + "\n", + "- `agent_id`, The Dialogflow CX Agent ID.\n", + "- `input_path`, The local path where your input data lives\n", + "- `output_summary_path`, The local path where you want the report summary written\n", + "- `output_results_path`, The local path where you want the full results written\n", + "\n", + "You can find a [Sample CSV Dataset here.](https://github.com/GoogleCloudPlatform/dfcx-scrapi/blob/main/data/nlu_evals_sample.csv)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "agent_id = ''\n", + "\n", + "input_path = '/path/to/your/input/data.csv'\n", + "output_summary_path = '/path/to/your/output/summary.csv'\n", + "output_results_path = '/path/to/your/output/results.csv'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run NLU Evals\n", + "\n", + "There are 3 main stages that happen for the Eval Pipeline:\n", + "1. Process and validate the input data\n", + "2. Run the Eval Tests\n", + "3. Write the output summary and details to a report." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-11 11:31:51 INFO ---------- STARTING Evals ----------\n", + "2023-09-11 11:31:56 WARNING Text input is too long. Truncating to 256 characters.\n", + "2023-09-11 11:31:56 WARNING TRUNCATED TEXT: Why can't I get my bill paid at all online? Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?Why can't I get my bill paid at all online?Why can't I get my bill paid at all onli\n", + "2023-09-11 11:31:58 INFO Progress(0/15)[> ] 0.00%\n", + "2023-09-11 11:32:08 INFO Progress(15/15)[------------------------------------------------->] 100.00%\n", + "2023-09-11 11:32:08 INFO ---------- Evals COMPLETE ----------\n" + ] + } + ], + "source": [ + "nlu = NluEvals(agent_id, creds_path=creds_path)\n", + "\n", + "df = nlu.process_input_csv(input_path)\n", + "df = nlu.run_evals(df)\n", + "nlu.write_summary_to_file(df, output_summary_path)\n", + "nlu.write_results_to_file(df, output_results_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "scrapi-local", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/vertex_ai_conversation/evaluation_tool__numeric_score__colab.ipynb b/examples/vertex_ai_conversation/evaluation_tool__numeric_score__colab.ipynb new file mode 100644 index 00000000..21eb3d10 --- /dev/null +++ b/examples/vertex_ai_conversation/evaluation_tool__numeric_score__colab.ipynb @@ -0,0 +1,333 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WpkyirmC-F33" + }, + "source": [ + "# Vertex AI Conversation - Evaluation Tool\n", + "\n", + "This tool requieres user's input in several steps. Please run the cells one by one (Shift+Enter) to ensure all the steps are succesfully completed.\n", + "\n", + "## Instructions:\n", + "\n", + "1. **Set-up**\n", + " 1. First cell: install and import dependencies\n", + " 2. Second cell: authentication - it requieres following the steps in the pop-up window. Alternatively, it can be replaced by other [supported authentication method](https://github.com/GoogleCloudPlatform/dfcx-scrapi#authentication)\n", + " 3. Third cell: introduce values for project, location and agent in the right panel; then run the cell.\n", + " 4. Fourth cell: run examples to validate set-up is correct\n", + "2. **Generate Questions & Answer**\n", + " 1. First cell: save a sample csv file with correct format\n", + " 2. Second cell: upload csv file with the fields `user_query` and an `ideal_answer` for all examples\n", + " 3. Third cell: bulk generation of `agent_answer` that includes the text and link\n", + "3. **Rating**\n", + " 1. First cell: download csv and add the ratings offline\n", + " 2. Second cell: upload csv file with the ratings\n", + "4. **Results**\n", + " 1. First cell: visualize distribution of ratings\n", + "\n", + "This notebook calls `DetectIntent` using [dfcx-scrapi library](https://github.com/GoogleCloudPlatform/dfcx-scrapi) for Dialogflow CX.\n", + "\n", + "\n", + "## Rating guidance:\n", + "\n", + "For each sample (aka row), the rater should evaluate each answer (including ythe link) that was generated by the agent. The answer will be evaluated with a integer number (escalar) from -1 to 3 as following:\n", + "* **+3** : Perfect answer > fully addresses the question with correct information and polite tone\n", + "* **+2** : Good answer > may contain unnecessary info, may miss some info, or may not be perfectly articulated\n", + "* **+1** : Slightly good answer > some truth to the answer\n", + "* **0** : Neutral answer > no answer or answer contains irrelevant info\n", + "* **-1** : Hurtful answer > wrong or misleading info, or inappropriate tone\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Afvsuux0zaWZ" + }, + "source": [ + "## Set-up\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PPJYRHN83bHg" + }, + "outputs": [], + "source": [ + "# Dependencies\n", + "!pip install dfcx-scrapi --quiet\n", + "\n", + "import io\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from dfcx_scrapi.core.sessions import Sessions\n", + "from google.auth import default\n", + "from google.colab import auth\n", + "from google.colab import files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sztyBjNlIGAw" + }, + "outputs": [], + "source": [ + "# Authentication\n", + "\n", + "auth.authenticate_user()\n", + "creds, _ = default()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mRUB0Uf-3uzS" + }, + "outputs": [], + "source": [ + "# Agent config\n", + "project_id = '' #@param{type: 'string'}\n", + "location = 'global' #@param{type: 'string'}\n", + "agent_id = '' #@param{type: 'string'}\n", + "\n", + "agent_id = f\"projects/{project_id}/locations/{location}/agents/{agent_id}\"\n", + "print(agent_id)\n", + "\n", + "s = Sessions(agent_id=agent_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OChJbblt3dt7" + }, + "outputs": [], + "source": [ + "# Test\n", + "user_query = 'Hello World!'\n", + "agent_answer = s.get_agent_answer(user_query)\n", + "print(f\" Q: {user_query}\\n A: {agent_answer}\")\n", + "\n", + "user_query = 'Which is the cheapest plan?'\n", + "agent_answer = s.get_agent_answer(user_query)\n", + "print(f\" Q: {user_query}\\n A: {agent_answer}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L2WQime-8-Dw" + }, + "source": [ + "## Generate Questions & Answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "q3II66B04F0j" + }, + "outputs": [], + "source": [ + "# Create sample csv\n", + "\n", + "sample_df = pd.DataFrame({\n", + " \"user_query\": [],\n", + " \"ideal_answer\": [],\n", + " \"agent_answer\": [],\n", + " \"rating\": [],\n", + " \"comment\": []\n", + "})\n", + "\n", + "sample_df.loc[0] = [\"Who are you?\", \"I am an assistant\", \"\", 0, \"\"]\n", + "sample_df.loc[1] = [\"Which is the cheapest plan?\", \"Basic plan\", \"\", 0, \"\"]\n", + "sample_df.loc[2] = [\"My device is not working\", \"Call 888-555\", \"\", 0, \"\"]\n", + "\n", + "# Export to local drive as csv file\n", + "file_name = 'data_sample.csv'\n", + "sample_df.to_csv(file_name, encoding='utf-8-sig', index=False)\n", + "files.download(file_name)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OYr4Dy77KbfL" + }, + "outputs": [], + "source": [ + "input(f\"In your local drive, you can find the csv file '{file_name}' Add the user_query and ideal_answer per example \\nWhen done, click 'Enter'\")\n", + "print('done')\n", + "\n", + "# Import from local drive the csv file with the user_query and ideal_answer per examples\n", + "uploaded = files.upload()\n", + "file_name2 = next(iter(uploaded))\n", + "df = pd.read_csv(io.BytesIO(uploaded[file_name2]))\n", + "\n", + "assert df.shape[0] > 0, \"The csv has zero rows\"\n", + "assert set(df.columns) == set(sample_df.columns), f\"The csv must have the following columns: {sample_df.columns.values}\"\n", + "\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RmJcxpFI881j" + }, + "outputs": [], + "source": [ + "# Generate answers for each query\n", + "df['agent_answer'] = df.apply(lambda row: s.get_agent_answer(row[\"user_query\"]), axis=1)\n", + "\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yO2x7lc2BRDR" + }, + "source": [ + "# Rating" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZfAMlQbS8qsy" + }, + "outputs": [], + "source": [ + "# Export to local drive as csv file\n", + "file_name = 'output.csv'\n", + "df.to_csv(file_name, encoding='utf-8-sig', index=False)\n", + "files.download(file_name)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SEU44Mcy9mBU" + }, + "outputs": [], + "source": [ + "input(f\"In your local drive, you can find the csv file '{file_name}' Rate each agent_answer using ideal_answer as reference. Rating from -1 to 3. \\nWhen done, click 'Enter'\")\n", + "print('done')\n", + "\n", + "# Import from local drive the csv file with the ratings\n", + "uploaded = files.upload()\n", + "file_name2 = next(iter(uploaded))\n", + "df = pd.read_csv(io.BytesIO(uploaded[file_name2]))\n", + "\n", + "assert df.shape[0] > 0, \"The csv has zero rows\"\n", + "assert set(df.columns) == set(sample_df.columns), f\"The csv must have the following columns: {sample_df.columns.values}\"\n", + "\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "W5j9yAewRmNO" + }, + "source": [ + "# Results\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "I5209MB7VS1q" + }, + "outputs": [], + "source": [ + "# Rating distribution\n", + "#df[\"rating\"].describe()\n", + "\n", + "# Histogram\n", + "ratings_set = [-1, 0, 1, 2, 3]\n", + "ratings_values = df['rating'].values\n", + "ratings_count = len(ratings_values)\n", + "\n", + "bar_centers = np.linspace(min(ratings_set), max(ratings_set), len(ratings_set))\n", + "bar_edges = np.linspace(min(ratings_set)-0.5, max(ratings_set)+0.5, len(ratings_set)+1)\n", + "bar_heights, _ = np.histogram(ratings_values, bins=bar_edges, density=True)\n", + "\n", + "for center, _h in zip(bar_centers, bar_heights):\n", + " print(f\"{center}: count={round(_h*ratings_count):.0f}, percentage={_h*100:.2f}%\")\n", + "\n", + "# Plot\n", + "height_sum = 100 # for percentage, use 100\n", + "fig, axs = plt.subplots(1, 1, figsize=(6, 4), tight_layout=True)\n", + "\n", + "plt.bar(bar_centers, height_sum*bar_heights, width=0.8)\n", + "ratings_mean = np.mean(ratings_values)\n", + "plt.plot([ratings_mean, ratings_mean], [0, height_sum], '--', label=f\"mean={ratings_mean:.2f}\", color='red')\n", + "ratings_median = np.median(ratings_values)\n", + "plt.plot([ratings_median, ratings_median], [0, height_sum], '--', label=f\"median={ratings_median:.2f}\", color='green')\n", + "\n", + "plt.axis((min(bar_edges), max(bar_edges), 0, round(1.2*max(height_sum*bar_heights), 1)))\n", + "plt.legend(loc='upper left')\n", + "plt.gca().grid(axis='y')\n", + "plt.xlabel('Rating')\n", + "plt.ylabel('Percentage [%]')\n", + "plt.title(f\"Rating distribution (count={ratings_count})\")\n", + "\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "fig.savefig('ratings_distribution.png', dpi=fig.dpi)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rYwsIZ0Ej-v9" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/examples/vertex_ai_conversation/evaluation_tool__numeric_score__nocolab.ipynb b/examples/vertex_ai_conversation/evaluation_tool__numeric_score__nocolab.ipynb new file mode 100644 index 00000000..30507310 --- /dev/null +++ b/examples/vertex_ai_conversation/evaluation_tool__numeric_score__nocolab.ipynb @@ -0,0 +1,355 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WpkyirmC-F33" + }, + "source": [ + "# Infobot Eval\n", + "\n", + "This tool requieres user's input in several steps. Please run the cells one by one (Shift+Enter) to ensure all the steps are succesfully completed.\n", + "\n", + "## Instructions:\n", + "\n", + "1. **Set-up**\n", + " 1. First cell: install and import dependencies\n", + " 2. Second cell: authentication - it requieres following the steps in the pop-up window. Alternatively, it can be replaced by other [supported authentication method](https://github.com/GoogleCloudPlatform/dfcx-scrapi#authentication)\n", + " 3. Third cell: introduce values for project, location and agent in the right panel; then run the cell.\n", + " 4. Fourth cell: run examples to validate set-up is correct\n", + "2. **Generate Questions & Answer**\n", + " 1. First cell: save a sample csv file with correct format\n", + " 2. Second cell: upload csv file with the fields `user_query` and an `ideal_answer` for all examples\n", + " 3. Third cell: bulk generation of `agent_answer` that includes the text and link\n", + "3. **Rating**\n", + " 1. First cell: download csv and add the ratings offline\n", + " 2. Second cell: upload csv file with the ratings\n", + "4. **Results**\n", + " 1. First cell: visualize distribution of ratings\n", + "\n", + "This notebook calls `DetectIntent` using [dfcx-scrapi library](https://github.com/GoogleCloudPlatform/dfcx-scrapi) for Dialogflow CX.\n", + "\n", + "\n", + "## Rating guidance:\n", + "\n", + "For each sample (aka row), the rater should evaluate each answer (including ythe link) that was generated by the agent. The answer will be evaluated with a integer number (escalar) from -1 to 3 as following:\n", + "* **+3** : Perfect answer > fully addresses the question with correct information and polite tone\n", + "* **+2** : Good answer > may contain unnecessary info, may miss some info, or may not be perfectly articulated\n", + "* **+1** : Slightly good answer > some truth to the answer\n", + "* **0** : Neutral answer > no answer or answer contains irrelevant info\n", + "* **-1** : Hurtful answer > wrong or misleading info, or inappropriate tone\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Afvsuux0zaWZ" + }, + "source": [ + "## Set-up\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PPJYRHN83bHg" + }, + "outputs": [], + "source": [ + "# Dependencies\n", + "!pip install dfcx-scrapi --quiet\n", + "\n", + "import io\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from dfcx_scrapi.core.sessions import Sessions" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ldbOTqPVnXRj" + }, + "source": [ + "**ATTENTION: MANUAL STEP**\n", + "\n", + "Instruction: Run the following commands one by one in the Terminal in order to authenticate the notebook\n", + "```\n", + "gcloud auth login\n", + "gcloud auth application-default login\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OegHGMr_nXRj" + }, + "source": [ + "**ATTENTION: MANUAL STEP**\n", + "\n", + "Instruction: In the next cell, edit the values of the Agent config, then run the cell\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mRUB0Uf-3uzS" + }, + "outputs": [], + "source": [ + "# Agent config\n", + "project_id = '' #@param{type: 'string'}\n", + "location = 'global' #@param{type: 'string'}\n", + "agent_id = '' #@param{type: 'string'}\n", + "\n", + "agent_id = f\"projects/{project_id}/locations/{location}/agents/{agent_id}\"\n", + "print(agent_id)\n", + "\n", + "s = Sessions(agent_id=agent_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OChJbblt3dt7" + }, + "outputs": [], + "source": [ + "# Test\n", + "user_query = 'Hello World!'\n", + "agent_answer = s.get_agent_answer(user_query)\n", + "print(f\" Q: {user_query}\\n A: {agent_answer}\")\n", + "\n", + "user_query = 'Which is the cheapest plan?'\n", + "agent_answer = s.get_agent_answer(user_query)\n", + "print(f\" Q: {user_query}\\n A: {agent_answer}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L2WQime-8-Dw" + }, + "source": [ + "## Generate Questions & Answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "q3II66B04F0j" + }, + "outputs": [], + "source": [ + "# Create sample csv\n", + "\n", + "sample_df = pd.DataFrame({\n", + " \"user_query\": [],\n", + " \"ideal_answer\": [],\n", + " \"agent_answer\": [],\n", + " \"rating\": [],\n", + " \"comment\": []\n", + "})\n", + "\n", + "sample_df.loc[0] = [\"Who are you?\", \"I am an assistant\", \"\", 0, \"\"]\n", + "sample_df.loc[1] = [\"Which is the cheapest plan?\", \"Basic plan\", \"\", 0, \"\"]\n", + "sample_df.loc[2] = [\"My device is not working\", \"Call 888-555\", \"\", 0, \"\"]\n", + "\n", + "# Export to local drive as csv file\n", + "file_name = 'data_sample.csv'\n", + "sample_df.to_csv(file_name, encoding='utf-8-sig', index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4n-dGQLonXRm" + }, + "source": [ + "**ATTENTION: MANUAL STEP**\n", + "\n", + "Instructions:\n", + "\n", + "1. Download the file `data_sample.csv` to your local drive by right-clicking in the file\n", + "2. Open the csv file `data_sample.csv` and add the `user_query` and `ideal_answer` per example\n", + "3. Upload the updated file from your local drive to the Jupyter File system by clicking 'Upload File'\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OYr4Dy77KbfL" + }, + "outputs": [], + "source": [ + "\n", + "file_name2 = file_name\n", + "df = pd.read_csv(file_name2)\n", + "\n", + "assert df.shape[0] > 0, \"The csv has zero rows\"\n", + "assert set(df.columns) == set(sample_df.columns), f\"The csv must have the following columns: {sample_df.columns.values}\"\n", + "\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RmJcxpFI881j" + }, + "outputs": [], + "source": [ + "# Generate answers for each query\n", + "df['agent_answer'] = df.apply(lambda row: s.get_agent_answer(row[\"user_query\"]), axis=1)\n", + "\n", + "# Export to local drive as csv file\n", + "file_name3 = file_name2\n", + "df.to_csv(file_name3, encoding='utf-8-sig', index=False)\n", + "\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yO2x7lc2BRDR" + }, + "source": [ + "# Rating" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uB0txK4QnXRn" + }, + "source": [ + "**ATTENTION: MANUAL STEP**\n", + "\n", + "Instructions:\n", + "\n", + "1. Download the file `data_sample.csv` to your local drive by right-clicking in the file\n", + "2. Open the csv file `data_sample.csv` and add the `rating` and `comment` (optionally) per example\n", + "3. Upload the updated file from your local drive to the Jupyter File system by clicking 'Upload File'\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SEU44Mcy9mBU" + }, + "outputs": [], + "source": [ + "\n", + "df = pd.read_csv(file_name3)\n", + "\n", + "assert df.shape[0] > 0, \"The csv has zero rows\"\n", + "assert set(df.columns) == set(sample_df.columns), f\"The csv must have the following columns: {sample_df.columns.values}\"\n", + "\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "W5j9yAewRmNO" + }, + "source": [ + "# Results\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "I5209MB7VS1q" + }, + "outputs": [], + "source": [ + "# Rating distribution\n", + "#df[\"rating\"].describe()\n", + "\n", + "# Histogram\n", + "ratings_set = [-1, 0, 1, 2, 3]\n", + "ratings_values = df['rating'].values\n", + "ratings_count = len(ratings_values)\n", + "\n", + "bar_centers = np.linspace(min(ratings_set), max(ratings_set), len(ratings_set))\n", + "bar_edges = np.linspace(min(ratings_set)-0.5, max(ratings_set)+0.5, len(ratings_set)+1)\n", + "bar_heights, _ = np.histogram(ratings_values, bins=bar_edges, density=True)\n", + "\n", + "for center, _h in zip(bar_centers, bar_heights):\n", + " print(f\"{center}: count={round(_h*ratings_count):.0f}, percentage={_h*100:.2f}%\")\n", + "\n", + "# Plot\n", + "height_sum = 100 # for percentage, use 100\n", + "fig, axs = plt.subplots(1, 1, figsize=(6, 4), tight_layout=True)\n", + "\n", + "plt.bar(bar_centers, height_sum*bar_heights, width=0.8)\n", + "ratings_mean = np.mean(ratings_values)\n", + "plt.plot([ratings_mean, ratings_mean], [0, height_sum], '--', label=f\"mean={ratings_mean:.2f}\", color='red')\n", + "ratings_median = np.median(ratings_values)\n", + "plt.plot([ratings_median, ratings_median], [0, height_sum], '--', label=f\"median={ratings_median:.2f}\", color='green')\n", + "\n", + "plt.axis((min(bar_edges), max(bar_edges), 0, round(1.2*max(height_sum*bar_heights), 1)))\n", + "plt.legend(loc='upper left')\n", + "plt.gca().grid(axis='y')\n", + "plt.xlabel('Rating')\n", + "plt.ylabel('Percentage [%]')\n", + "plt.title(f\"Rating distribution (count={ratings_count})\")\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rYwsIZ0Ej-v9" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [], + "toc_visible": true + }, + "environment": { + "kernel": "python3", + "name": "tf2-cpu.2-11.m112", + "type": "gcloud", + "uri": "gcr.io/deeplearning-platform-release/tf2-cpu.2-11:m112" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/requirements.txt b/requirements.txt index a2d91b6d..f8e90c5b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,9 +3,10 @@ google-cloud-dialogflow-cx google-auth google-oauth oauth2client +pyparsing==2.4.7 pandas tabulate -gspread +gspread==5.10.0 gspread_dataframe numpy requests @@ -13,7 +14,7 @@ pylint==2.8.3 pytest==6.0.2 pytest-cov==2.11.1 pytest-xdist==2.1.0 -pyyaml==5.3.1 +pyyaml==5.4 torch transformers sentencepiece diff --git a/setup.py b/setup.py index ceea9628..ae21a6a1 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ setup( name='dfcx-scrapi', - version='1.6.0', + version='1.9.0', description='A high level scripting API for bot builders, developers, and\ maintainers.', long_description=long_description, diff --git a/src/dfcx_scrapi/agent_extract/__init__.py b/src/dfcx_scrapi/agent_extract/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/dfcx_scrapi/agent_extract/agents.py b/src/dfcx_scrapi/agent_extract/agents.py new file mode 100644 index 00000000..0bcf8879 --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/agents.py @@ -0,0 +1,146 @@ +"""Agent processing methods and functions.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import time +import os +import shutil +from typing import Dict + +from dfcx_scrapi.core import agents +from dfcx_scrapi.core import operations +from dfcx_scrapi.core import scrapi_base +from dfcx_scrapi.agent_extract import graph +from dfcx_scrapi.agent_extract import flows +from dfcx_scrapi.agent_extract import intents +from dfcx_scrapi.agent_extract import entity_types +from dfcx_scrapi.agent_extract import test_cases +from dfcx_scrapi.agent_extract import webhooks +from dfcx_scrapi.agent_extract import gcs_utils +from dfcx_scrapi.agent_extract import types + +# logging config +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)-8s %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", +) + +class Agents(scrapi_base.ScrapiBase): + """Agent Metadata methods and functions.""" + def __init__( + self, + agent_id: str, + lang_code: str = "en", + creds_path: str = None, + creds_dict: Dict = None, + creds=None, + scope=False + ): + super().__init__( + creds_path=creds_path, + creds_dict=creds_dict, + creds=creds, + scope=scope, + ) + self.agent_id = agent_id + self.lang_code = lang_code + self._core_agents = agents.Agents(creds=creds) + self.gcs = gcs_utils.GcsUtils() + self.flows = flows.Flows() + self.intents = intents.Intents() + self.etypes = entity_types.EntityTypes() + self.webhooks = webhooks.Webhooks() + self.tcs = test_cases.TestCases() + self.ops = operations.Operations() + + @staticmethod + def prep_local_dir(agent_local_path: str): + """Prepare the local directory for agent zip file.""" + if os.path.isdir(agent_local_path): + logging.info("Cleaning up old directory...") + shutil.rmtree(agent_local_path) + logging.info(f"Making temp directory: {agent_local_path}") + os.mkdir(agent_local_path) + else: + os.mkdir(agent_local_path) + + def await_lro(self, lro: str): + """Wait for long running operation to complete.""" + try: + i = 0 + while not self.ops.get_lro(lro).done: + time.sleep(1) + i += 1 + if i == 20: + break + + except UserWarning: + print("LRO Failed.") + + return True + + def export_agent(self, agent_id: str, gcs_bucket_uri: str, + environment_display_name: str = None): + """Handle the agent export, LRO and logging.""" + export_start = time.time() + logging.info("Exporting agent...") + lro = self._core_agents.export_agent( + agent_id=agent_id,gcs_bucket_uri=gcs_bucket_uri, data_format="JSON", + environment_display_name=environment_display_name) + + + self.await_lro(lro) + logging.info("Export Complete.") + logging.debug(f"EXPORT: {time.time() - export_start}") + + def download_and_extract(self, agent_local_path: str, gcs_bucket_uri: str): + """Handle download from GCS and extracting ZIP file.""" + if not os.path.exists(agent_local_path): + os.makedirs(agent_local_path) + + download_start = time.time() + logging.info("Downloading agent file from GCS Bucket...") + agent_file = self.gcs.download_gcs( + gcs_path=gcs_bucket_uri, local_path=agent_local_path) + logging.info("Download complete.") + logging.debug(f"DOWNLOAD: {time.time() - download_start}") + + self.gcs.unzip(agent_file, agent_local_path) + + + def process_agent(self, agent_id: str, gcs_bucket_uri: str, + environment_display_name: str = None): + """Process the specified Agent for offline data gathering.""" + agent_local_path = "/tmp/agent" + self.prep_local_dir(agent_local_path) + self.export_agent(agent_id, gcs_bucket_uri, environment_display_name) + self.download_and_extract(agent_local_path, gcs_bucket_uri) + + logging.info("Processing Agent...") + data = types.AgentData() + data.graph = graph.Graph() + data.lang_code = self.lang_code + data.agent_id = agent_id + data = self.flows.process_flows_directory(agent_local_path, data) + data = self.intents.process_intents_directory(agent_local_path, data) + data = self.etypes.process_entity_types_directory( + agent_local_path, data) + data = self.webhooks.process_webhooks_directory(agent_local_path, data) + data = self.tcs.process_test_cases_directory(agent_local_path, data) + logging.info("Processing Complete.") + + return data diff --git a/src/dfcx_scrapi/agent_extract/common.py b/src/dfcx_scrapi/agent_extract/common.py new file mode 100644 index 00000000..26b86ff3 --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/common.py @@ -0,0 +1,80 @@ +"""Common methods and helper functions used throughout library.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import re +from dfcx_scrapi.agent_extract import types + +# logging config +logging.basicConfig( + level=logging.INFO, + format="%(message)s", +) + +class Common: + """Common methods and helper functions used throughout library.""" + + @staticmethod + def parse_filepath(in_path: str, resource_type: str) -> str: + """Parse file path to provide quick reference for resource.""" + + regex_map = { + "flow": r".*\/flows\/([^\/]*)", + "page": r".*\/pages\/([^\/]*)\.", + "entity_type": r".*\/entityTypes\/([^\/]*)", + "intent": r".*\/intents\/([^\/]*)", + "route_group": r".*\/transitionRouteGroups\/([^\/]*)", + "webhook": r".*\/webhooks\/([^\/]*)\." + } + resource_name = re.match(regex_map[resource_type], in_path).groups()[0] + + return resource_name + + @staticmethod + def clean_display_name(display_name: str): + """Replace cspecial haracters from map for the given display name.""" + patterns = { + "%22": '"', + "%23": "#", + "%24": "$", + "%26": "&", + "%27": "'", + "%28": "(", + "%29": ")", + "%2b": "+", + "%2c": ",", + "%2f": "/", + "%3a": ":", + "%3c": "<", + "%3d": "=", + "%3e": ">", + "%3f": "?", + "%5b": "[", + "%5d": "]", + "%e2%80%9c": "“", + "%e2%80%9d": "”", + } + + for key, value in patterns.items(): + if key in display_name: + display_name = display_name.replace(key, value) + + return display_name + + @staticmethod + def check_lang_code(lang_code: str, stats: types.AgentData): + """Check to see if file lang_code matches user input lang_code.""" + return stats.lang_code == lang_code diff --git a/src/dfcx_scrapi/agent_extract/entity_types.py b/src/dfcx_scrapi/agent_extract/entity_types.py new file mode 100644 index 00000000..17469b58 --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/entity_types.py @@ -0,0 +1,168 @@ +"""Entity Type processing methods and functions.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os + +from typing import Dict + +from dfcx_scrapi.agent_extract import common +from dfcx_scrapi.agent_extract import types + +class EntityTypes: + """Entity Type processing methods and functions.""" + + def __init__(self): + self.common = common.Common() + + @staticmethod + def build_entity_type_path_list(agent_local_path: str): + """Builds a list of dirs, each representing an Entity Type directory. + + Ex: /path/to/agent/entityTypes/ + + This dir path can then be used to find the next level of information + in the directory by appending the appropriate next dir structures like: + - .json, for the Entity Type object + - /entities, for the Entities dir + """ + root_dir = agent_local_path + "/entityTypes" + + entity_type_paths = [] + + for entity_type_dir in os.listdir(root_dir): + entity_type_dir_path = f"{root_dir}/{entity_type_dir}" + entity_type_paths.append(entity_type_dir_path) + + return entity_type_paths + + @staticmethod + def build_lang_code_paths(etype: types.EntityType): + """Builds dict of lang codes and file locations. + + The language_codes and paths for each file are stored in a dictionary + inside of the Entity Type dataclass. This dict is accessed later to + lint each file and provide reporting based on each language code. + """ + root_dir = etype.dir_path + "/entities" + + for lang_file in os.listdir(root_dir): + lang_code = lang_file.split(".")[0] + lang_code_path = f"{root_dir}/{lang_file}" + etype.entities[lang_code] = {"file_path": lang_code_path} + + @staticmethod + def build_excluded_phrases_path(etype: types.EntityType, lang_code: str): + """Builds a dict of excluded phrases and file locations.""" + root_dir = etype.dir_path + "/excludedPhrases" + lang_code_path = f"{root_dir}/{lang_code}.json" + + return lang_code_path + + @staticmethod + def process_entity_type_metadata(etype: types.EntityType): + """Extract metadata for Entity Type for later processing.""" + metadata_file = etype.dir_path + f"/{etype.display_name}.json" + + with open(metadata_file, "r", encoding="UTF-8") as etype_file: + etype.data = json.load(etype_file) + etype.resource_id = etype.data.get("name", None) + etype.kind = etype.data.get("kind", None) + etype.auto_expansion = etype.data.get("autoExpansionMode", None) + etype.fuzzy_extraction = etype.data.get( + "enableFuzzyExtraction", False) + + etype_file.close() + + def process_excluded_phrases_language_codes( + self, data: Dict[str, str], lang_code_path: str): + """Process all ecluded phrases lang_code files.""" + with open(lang_code_path, "r", encoding="UTF-8") as ent_file: + new_data = json.load(ent_file) + data["excluded_phrases"] = new_data.get("excludedPhrases", None) + + return data + + def process_excluded_phrases(self, etype: types.EntityType, lang_code: str, + data: Dict[str, str]): + """Process the excluded phrases if they exist.""" + if "excludedPhrases" in os.listdir(etype.dir_path): + lang_code_path = self.build_excluded_phrases_path(etype, lang_code) + data = self.process_excluded_phrases_language_codes( + data, lang_code_path) + + return data + + def process_language_codes( + self, etype: types.EntityType, stats: types.AgentData): + """Process all Entity Type lang_code files.""" + for lang_code in etype.entities: + ent_file_path = etype.entities[lang_code]["file_path"] + + if not self.common.check_lang_code(lang_code, stats): + continue + + with open(ent_file_path, "r", encoding="UTF-8") as ent_file: + data = json.load(ent_file) + data["name"] = f"{stats.agent_id}/entityTypes/"\ + f"{etype.resource_id}" + data["display_name"] = etype.display_name + data["kind"] = etype.kind + data["entities"] = data.get("entities", None) + data = self.process_excluded_phrases(etype, lang_code, data) + stats.entity_types.append(data) + + ent_file.close() + + return stats + + def process_entities(self, etype: types.EntityType, stats: types.AgentData): + """Process the Entity files inside of an Entity Type.""" + if "entities" in os.listdir(etype.dir_path): + self.build_lang_code_paths(etype) + stats = self.process_language_codes(etype, stats) + + return stats + + def process_entity_type( + self, etype: types.EntityType, stats: types.AgentData): + """Process a Single Entity Type dir and all subdirectories.""" + + etype.display_name = self.common.parse_filepath( + etype.dir_path, "entity_type") + etype.display_name = self.common.clean_display_name(etype.display_name) + + self.process_entity_type_metadata(etype) + stats = self.process_entities(etype, stats) + stats.total_entity_types += 1 + + return stats + + def process_entity_types_directory( + self, agent_local_path: str, stats: types.AgentData): + """Processing the Entity Types dir in the JSON Package structure.""" + # Create a list of all Entity Type paths to iter through + entity_type_paths = self.build_entity_type_path_list(agent_local_path) + + for entity_type_path in entity_type_paths: + etype = types.EntityType() + etype.dir_path = entity_type_path + + stats = self.process_entity_type(etype, stats) + full_etype_id = f"{stats.agent_id}/entityTypes/{etype.resource_id}" + stats.entity_types_map[etype.display_name] = full_etype_id + + return stats diff --git a/src/dfcx_scrapi/agent_extract/flows.py b/src/dfcx_scrapi/agent_extract/flows.py new file mode 100644 index 00000000..6d991dac --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/flows.py @@ -0,0 +1,301 @@ +"""Flow extract methods and functions.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os + +from typing import List + +from dfcx_scrapi.agent_extract import graph +from dfcx_scrapi.agent_extract import common +from dfcx_scrapi.agent_extract import types +from dfcx_scrapi.agent_extract import pages +from dfcx_scrapi.agent_extract import routes +from dfcx_scrapi.agent_extract import route_groups + + +class Flows: + """Flow processing methods and functions.""" + + def __init__(self): + self.common = common.Common() + self.pages = pages.Pages() + self.rgs = route_groups.RouteGroups() + self.routes = routes.Fulfillments() + self.special_pages = [ + "End Session", + "End Flow", + "Start Page", + "Current Page", + "Previous Page", + ] + + @staticmethod + def build_flow_path_list(agent_local_path: str): + """Builds a list of dirs, each representing a Flow directory. + + Ex: /path/to/agent/flows/ + + This dir path can then be used to find the next level of information + in the directory by appending the appropriate next dir structures like: + - .json, for the Flow object + - /transitionRouteGroups, for the Route Groups dir + - /pages, for the Pages dir + """ + root_dir = agent_local_path + "/flows" + + flow_paths = [] + + for flow_dir in os.listdir(root_dir): + flow_dir_path = f"{root_dir}/{flow_dir}" + flow_paths.append(flow_dir_path) + + return flow_paths + + @staticmethod + def remove_flow_pages_from_set(input_set: set) -> set: + """Remove any transitions tagged with FLOW. + + Some route transitions go to Flow instead of Page. For these + transitions, we tag them with `FLOW` for easier identification later. + However, when reporting on Graph inconsistencies like Dangling or + Unreachable pages, we want to remove these from any result sets as they + are not relevant. + """ + filtered_set = set() + + for page in input_set: + if "FLOW" not in page: + filtered_set.add(page) + + return filtered_set + + def find_unreachable_pages(self, flow: types.Flow): + """Find Unreachable Pages in the graph. + + An Unreachable Page is defined as: + - A Page which has no incoming edge when traversed from Start Page. + That is, it is unreachable in the graph by any practical means. + - A Page which is connected to a root unreachable page. That is, a + page that could have both incoming or outgoing routes, but due to + its connectedness to the root orphan page, is unreachable in the + graph. + + Here we will compute the symmetric difference of 2 sets: + - Active Pages (i.e. Pages that were reachable in the graph) + - Used Pages (i.e. Pages that were used by some Route) + + If an Unreachable Page has children that it routes to, those children + will appear in Used Pages, although they will ultimately be + unreachable. It's possible for an Unreachable Page to route back to an + Active Page in the graph. For these instances, we don't want to count + those pages as unreachable, because they are reachable via other + sections of the graph. + """ + filtered_set = flow.active_pages.symmetric_difference( + flow.graph.used_nodes + ) + filtered_set = self.remove_flow_pages_from_set(filtered_set) + flow.unreachable_pages.update(filtered_set) + + return flow + + def find_unused_pages(self, flow: types.Flow): + """Find Unused Pages in the graph. + + An Unused Page is defined as: + - A Page which has no incoming or outgoing edge AND + - A Page which exists in the Agent design time, but which is not + present anywhere in the graph, either visible or non-visible. + + Here we will compute the difference of 2 sets: + - All Pages (i.e. Pages that exist in the Agent Design Time) + - Used Pages (i.e. Pages that were used by some Route) + + The resulting set will consist of 2 types of Pages: + - Truly Unused Pages + - Unreachable Root Pages + + Unreachable Root Pages end up in the results due to the fact that no + other Active Page is pointing to them. We remove these from the + resulting set before presenting the Truly Unused Pages. + """ + + # Discard special pages as they are non-relevant for final outcome + for page in self.special_pages: + flow.all_pages.discard(page) + + prelim_unused = flow.all_pages.difference(flow.graph.used_nodes) + + # Filter out Unreachable Root Pages + filtered_set = set() + + for page in prelim_unused: + if page not in flow.graph.edges: + filtered_set.add(page) + else: + flow.unreachable_pages.add(page) + + flow.unused_pages = filtered_set + + return flow + + def recurse_edges( + self, edges: List, page: types.Page, dangling: set, visited: set + ): + """Recursive method searching graph edges for Active / Dangling Pages. + + A byproduct of searching for Dangling Pages in the graph is that we can + produce a set of Active Pages in the graph. These are pages that are + reachable when traversing from the Start Page. These can then be used + to determine Unreachable Pages in another method. + """ + # For Flow Start Pages, we prepend the Flow name for later + # identification. For this section, we'll need to strip it off to + # compare with the other sets. + if page in edges: + for inner_page in edges[page]: + if inner_page not in visited: + visited.add(inner_page) + dangling, visited = self.recurse_edges( + edges, inner_page, dangling, visited + ) + + else: + dangling.add(page) + + return dangling, visited + + def find_dangling_pages(self, flow: types.Flow): + """Find Dangling Pages in the graph. + + Dangling Page is defined as: + - Any page that exists in the graph that has no outgoing edge + Active Page is defined as: + - Any page that is reachable via an active route in the graph and can + be traced back to the Start Page. + + These pages can result in a conversational "dead end" which is + potentially unrecoverable. + A byproduct of searching for the dangling pages is locating all of the + "active" pages. These are the pages that are "visited" as we traverse + the graph. We'll also return Active Pages in this method since they + will be used for downstream tasks. + """ + + flow.dangling_pages, flow.active_pages = self.recurse_edges( + flow.graph.edges, + f"{flow.display_name}: Start Page", + flow.dangling_pages, + flow.active_pages, + ) + + # Clean up Special Pages + for page in self.special_pages: + flow.dangling_pages.discard(page) + + flow.dangling_pages = self.remove_flow_pages_from_set( + flow.dangling_pages + ) + + return flow + + def process_start_page(self, flow: types.Flow, stats: types.AgentData): + """Process a single Flow Path file.""" + with open(flow.start_page_file, "r", encoding="UTF-8") as flow_file: + page = types.Page(flow=flow) + page.display_name = f"{flow.display_name}: Start Page" + + # We keep track of an instance specific Flow graph for the current + # Flow, and then a main Graph for the entire agent. + flow.graph.add_node(page.display_name) + stats.graph.add_node(page.display_name) + + page.data = json.load(flow_file) + page.events = page.data.get("eventHandlers", None) + page.routes = page.data.get("transitionRoutes", None) + page.route_groups = page.data.get("transitionRouteGroups", None) + stats.flows.append(page.data) + + flow.resource_id = page.data.get("name", None) + + # Order of processing is important + stats = self.routes.process_routes(page, stats) + stats = self.routes.process_events(page, stats) + + if page.route_groups: + page, stats = self.routes.set_route_group_targets(page, stats) + + flow_file.close() + + full_flow_id = f"{stats.agent_id}/flows/{flow.resource_id}" + stats.flows_map[flow.display_name] = full_flow_id + stats.flow_page_map[flow.display_name] = { + "id": full_flow_id, + "pages": {} + } + + return stats + + def process_flow(self, flow: types.Flow, stats: types.AgentData): + """Process a Single Flow dir and all subdirectories.""" + flow.file_name = self.common.parse_filepath(flow.dir_path, "flow") + flow.display_name = self.common.clean_display_name(flow.file_name) + + flow.start_page_file = f"{flow.dir_path}/{flow.file_name}.json" + + stats.pages[flow.display_name] = [] + stats.active_intents[flow.display_name] = [] + stats = self.process_start_page(flow, stats) + stats = self.pages.process_pages_directory(flow, stats) + stats = self.rgs.process_route_groups_directory(flow, stats) + + # Order of Find Operations is important here! + flow = self.find_unused_pages(flow) + flow = self.find_dangling_pages(flow) + flow = self.find_unreachable_pages(flow) + + stats.active_pages[flow.display_name] = flow.active_pages + stats.unused_pages[flow.display_name] = flow.unused_pages + stats.unreachable_pages[flow.display_name] = flow.unreachable_pages + + return stats + + def process_flows_directory( + self, agent_local_path: str, stats: types.AgentData): + """Process the top level Flows dir in the JSON Package structure. + + The following files/dirs exist under the `flows` dir: + - Flow object (i.e. Flow START_PAGE) + - transitionRouteGroups + - pages + + In Dialogflow CX, the START_PAGE of each Flow is a special kind of Page + that exists within the Flow object itself. In this method, we will lint + the Flow object, all files in the transitionRouteGroups dir and all + files in the pages dir. + """ + # Create a list of all Flow paths to iter through + flow_paths = self.build_flow_path_list(agent_local_path) + stats.total_flows = len(flow_paths) + + for flow_path in flow_paths: + flow = types.Flow() + flow.graph = graph.Graph() + flow.dir_path = flow_path + stats = self.process_flow(flow, stats) + + return stats diff --git a/src/dfcx_scrapi/agent_extract/gcs_utils.py b/src/dfcx_scrapi/agent_extract/gcs_utils.py new file mode 100644 index 00000000..c3daff89 --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/gcs_utils.py @@ -0,0 +1,68 @@ +"""Utils for Cloud Storage and local file manipulation.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import zipfile +from google.cloud import storage +from google.oauth2 import service_account + + +class GcsUtils: + """Utils for Cloud Storage and local file manipulation.""" + + def __init__(self, creds_path: str = None, project_id: str = None): + if creds_path and project_id: + self.creds = service_account.Credentials.from_service_account_file( + creds_path + ) + self.gcs_client = storage.Client( + credentials=self.creds, project=project_id + ) + + else: + self.gcs_client = storage.Client() + + @staticmethod + def unzip(agent_zip_file_path: str, extract_path: str): + """Unzip file locally.""" + with zipfile.ZipFile(agent_zip_file_path, "r") as zip_ref: + zip_ref.extractall(extract_path) + + @staticmethod + def check_for_gcs_file(file_path: str) -> bool: + """Validates GCS path vs. local path.""" + is_gcs_file = False + + file_prefix = file_path.split("/")[0] + if file_prefix == "gs:": + is_gcs_file = True + + return is_gcs_file + + def download_gcs(self, gcs_path: str, local_path: str = None): + """Downloads the specified GCS file to local machine.""" + path = gcs_path.split("//")[1] + bucket = path.split("/", 1)[0] + gcs_object = path.split("/", 1)[1] + file_name = gcs_object.split("/")[-1] + bucket = self.gcs_client.bucket(bucket) + blob = storage.Blob(gcs_object, bucket) + + if local_path: + file_name = local_path + "/" + file_name + + blob.download_to_filename(file_name) + + return file_name diff --git a/src/dfcx_scrapi/agent_extract/graph.py b/src/dfcx_scrapi/agent_extract/graph.py new file mode 100644 index 00000000..b94217ce --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/graph.py @@ -0,0 +1,45 @@ +"""Utility class for managing graph structure.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections + +class Graph: + """Utility class for manaing graph structure.""" + + def __init__(self): + self.nodes = set() + self.edges = collections.defaultdict(list) + self.used_nodes = set() + + def add_node(self, node): + """Add node to set of all nodes, regardless of use in graph.""" + self.nodes.add(node) + + def add_edge(self, node1, node2): + self.edges[node1].append(node2) + + def add_used_node(self, node): + """Add node to set of active in use nodes for the graph.""" + self.used_nodes.add(node) + + def remove_node(self, node): + self.nodes.remove(node) + + def remove_edge(self, node1, node2): + self.edges[node1].remove(node2) + + def __str__(self): + return f"Graph({self.nodes}, {self.edges})" diff --git a/src/dfcx_scrapi/agent_extract/intents.py b/src/dfcx_scrapi/agent_extract/intents.py new file mode 100644 index 00000000..e8d46dca --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/intents.py @@ -0,0 +1,166 @@ +"""Intent processing methods and functions.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os + +from dfcx_scrapi.agent_extract import common +from dfcx_scrapi.agent_extract import types + + +class Intents: + """Intent processing methods and functions.""" + + def __init__(self): + self.common = common.Common() + + @staticmethod + def parse_lang_code(lang_code_path: str) -> str: + """Extract the language_code from the given file path.""" + + first_parse = lang_code_path.split("/")[-1] + lang_code = first_parse.split(".")[0] + + return lang_code + + @staticmethod + def build_lang_code_paths(intent: types.Intent): + """Builds dict of lang codes and file locations. + + The language_codes and paths for each file are stored in a dictionary + inside of the Intent dataclass. This dict is access later to process + each file and provide reporting based on each language code. + """ + root_dir = intent.dir_path + "/trainingPhrases" + + for lang_file in os.listdir(root_dir): + lang_code = lang_file.split(".")[0] + lang_code_path = f"{root_dir}/{lang_file}" + intent.training_phrases[lang_code] = {"file_path": lang_code_path} + + @staticmethod + def build_intent_path_list(agent_local_path: str): + """Builds a list of dirs, each representing an Intent directory. + + Ex: /path/to/agent/intents/ + + This dir path can be used to find the next level of information + in the directory by appending the appropriate next dir structures like: + - .json, for the Intent object metadata + - /trainingPhrases, for the Training Phrases dir + """ + root_dir = agent_local_path + "/intents" + + intent_paths = [] + + for intent_dir in os.listdir(root_dir): + intent_dir_path = f"{root_dir}/{intent_dir}" + intent_paths.append(intent_dir_path) + + return intent_paths + + def process_intent_metadata( + self, intent: types.Intent): + """Process the metadata file for a single Intent.""" + intent.metadata_file = f"{intent.dir_path}/{intent.display_name}.json" + + try: + with open(intent.metadata_file, "r", encoding="UTF-8") as meta_file: + intent.data = json.load(meta_file) + intent.resource_id = intent.data.get("name", None) + intent.labels = intent.data.get("labels", None) + intent.description = intent.data.get("description", None) + intent.parameters = intent.data.get("parameters", None) + + meta_file.close() + + except FileNotFoundError: + pass + + def process_language_codes( + self, intent: types.Intent, stats: types.AgentData): + """Process all training phrase lang_code files.""" + + for lang_code in intent.training_phrases: + tp_file = intent.training_phrases[lang_code]["file_path"] + + if not self.common.check_lang_code(lang_code, stats): + continue + + with open(tp_file, "r", encoding="UTF-8") as tps: + data = json.load(tps) + data["name"] = f"{stats.agent_id}/intents/{intent.resource_id}" + data["display_name"] = intent.display_name + data["labels"] = intent.labels + data["description"] = intent.description + data["parameters"] = intent.parameters + stats.intents.append(data) + stats.total_training_phrases += len(data["trainingPhrases"]) + + tps.close() + + return stats + + def process_training_phrases( + self, intent: types.Intent, stats: types.AgentData): + """Process the Training Phrase dir for a single Intent.""" + if "trainingPhrases" in os.listdir(intent.dir_path): + self.build_lang_code_paths(intent) + stats = self.process_language_codes(intent, stats) + + return stats + + def process_intent(self, intent: types.Intent, stats: types.AgentData): + """Process a single Intent directory and associated files.""" + intent.display_name = self.common.parse_filepath( + intent.dir_path, "intent") + intent.display_name = self.common.clean_display_name( + intent.display_name) + + self.process_intent_metadata(intent) + stats = self.process_training_phrases(intent, stats) + stats.total_intents += 1 + + return stats + + def process_intents_directory( + self, agent_local_path: str, stats: types.AgentData): + """Processing the top level Intents Dir in the JSON Package structure. + + The following files/dirs exist under the `intents` dir: + - Directory + - trainingPhrases + - .json + - Object + + In Dialogflow CX, the Training Phrases of each Intent are stored in + individual .json files by language code under each Intent Display + Name. In this method, we will process all Intent dirs, including the + training phrase files and metadata objects for each Intent. + """ + # Create a list of all Intent paths to iter through + intent_paths = self.build_intent_path_list(agent_local_path) + stats.intents = [] + + for intent_path in intent_paths: + intent = types.Intent() + intent.dir_path = intent_path + + stats = self.process_intent(intent, stats) + full_intent_id = f"{stats.agent_id}/intents/{intent.resource_id}" + stats.intents_map[intent.display_name] = full_intent_id + + return stats diff --git a/src/dfcx_scrapi/agent_extract/pages.py b/src/dfcx_scrapi/agent_extract/pages.py new file mode 100644 index 00000000..51a710c6 --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/pages.py @@ -0,0 +1,138 @@ +"""Pages processing methods and functions.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os + +from typing import Dict, Any + +from dfcx_scrapi.agent_extract import common +from dfcx_scrapi.agent_extract import types +from dfcx_scrapi.agent_extract import routes + + +class Pages: + """Pages processing methods and functions.""" + + def __init__(self): + self.common = common.Common() + self.routes = routes.Fulfillments() + + @staticmethod + def build_page_path_list(flow_path: str): + """Builds a list of files, each representing a Page. + + Ex: /path/to/agent/flows//pages/.json + """ + pages_path = f"{flow_path}/pages" + + page_paths = [] + + for page in os.listdir(pages_path): + page_file_path = f"{pages_path}/{page}" + page_paths.append(page_file_path) + + return page_paths + + @staticmethod + def get_form_parameter_data(param: Dict[str, Any], page: types.Page): + fp = types.FormParameter(page=page) + fp.display_name = param.get("displayName", None) + fp.entity_type = param.get("entityType", None) + fp.required = param.get("required", None) + + fp.fill_behavior = param.get("fillBehavior", None) + + if fp.fill_behavior: + fp.init_fulfillment = fp.fill_behavior.get( + "initialPromptFulfillment", None) + fp.reprompt_handlers = fp.fill_behavior.get( + "repromptEventHandlers", None) + + fp.advanced_settings = page.form.get("advancedSettings", None) + + if fp.advanced_settings: + fp.dtmf_settings = fp.advanced_settings.get("dtmfSettings", None) + + return fp + + def process_form(self, page: types.Page, stats: types.AgentData): + """Process the Form and sub-resources within it for the Page.""" + parameters = page.form.get("parameters", None) + + if parameters: + for param in parameters: + fp = self.get_form_parameter_data(param, page) + stats = self.routes.process_reprompt_handlers(fp, stats) + + return stats + + + def process_page(self, page: types.Page, stats: types.AgentData): + """Process a Single Page file.""" + page.display_name = self.common.parse_filepath(page.page_file, "page") + page.display_name = self.common.clean_display_name(page.display_name) + + stats.graph.add_node(page.display_name) + page.flow.graph.add_node(page.display_name) + + page.flow.all_pages.add(page.display_name) + + with open(page.page_file, "r", encoding="UTF-8") as page_file: + page.data = json.load(page_file) + page.entry = page.data.get("entryFulfillment", None) + page.events = page.data.get("eventHandlers", None) + page.form = page.data.get("form", None) + page.routes = page.data.get("transitionRoutes", None) + page.route_groups = page.data.get("transitionRouteGroups", None) + page.resource_id = page.data.get("name", None) + + # Order of linting is important here + stats = self.routes.process_entry(page, stats) + stats = self.routes.process_routes(page, stats) + stats = self.routes.process_events(page, stats) + stats = self.process_form(page, stats) + + if page.route_groups: + page, stats = self.routes.set_route_group_targets(page, stats) + + page_file.close() + + full_flow_id = f"{stats.agent_id}/flows/{page.flow.resource_id}" + full_page_id = f"{full_flow_id}/pages/{page.resource_id}" + stats.pages[page.flow.display_name].append(page.data) + stats.flow_page_map[ + page.flow.display_name]["pages"][page.display_name] = full_page_id + + return stats + + def process_pages_directory(self, flow: types.Flow, stats: types.AgentData): + """Process the Pages dir inside a specific Flow dir. + + Some Flows may not contain Pages, so we check for the existence + of the directory before traversing + """ + if "pages" in os.listdir(flow.dir_path): + page_paths = self.build_page_path_list(flow.dir_path) + + for page_path in page_paths: + page = types.Page(flow=flow) + page.agent_id = flow.agent_id + page.page_file = page_path + stats.total_pages += 1 + stats = self.process_page(page, stats) + + return stats diff --git a/src/dfcx_scrapi/agent_extract/route_groups.py b/src/dfcx_scrapi/agent_extract/route_groups.py new file mode 100644 index 00000000..3a73e50a --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/route_groups.py @@ -0,0 +1,100 @@ +"""Route Groups processing methods and functions.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import json + +from dfcx_scrapi.agent_extract import common +from dfcx_scrapi.agent_extract import types +from dfcx_scrapi.agent_extract import routes + + +class RouteGroups: + """Route Groups processing methods and functions.""" + + def __init__(self): + self.special_pages = [ + "End Session", + "End Flow", + "Start Page", + "Current Page", + "Previous Page", + ] + + self.common = common.Common() + self.routes = routes.Fulfillments() + + @staticmethod + def build_route_group_path_list(flow_local_path: str): + """Builds a list of files, each representing a Route Group. + + Ex: /path/to/agent/flows//transitionRouteGroups/ + """ + root_dir = flow_local_path + "/transitionRouteGroups" + + if "transitionRouteGroups" in os.listdir(flow_local_path): + rg_paths = [] + + for rg_file in os.listdir(root_dir): + rg_file_path = f"{root_dir}/{rg_file}" + rg_paths.append(rg_file_path) + + return rg_paths + + def process_route_group(self, rg: types.RouteGroup, stats: types.AgentData): + """Process a single Route Group.""" + rg.display_name = self.common.parse_filepath(rg.rg_file, "route_group") + rg.display_name = self.common.clean_display_name(rg.display_name) + + with open(rg.rg_file, "r", encoding="UTF-8") as route_group_file: + rg.data = json.load(route_group_file) + rg.resource_id = rg.data.get("name", None) + rg.display_name = rg.data.get("displayName", None) + rg.routes = rg.data.get("transitionRoutes", None) + + stats = self.routes.process_routes(rg, stats) + + route_group_file.close() + + full_flow_id = f"{stats.agent_id}/flows/{rg.flow.resource_id}" + full_rg_id = f"{full_flow_id}/transitionRouteGroups/{rg.resource_id}" + stats.route_groups_map[ + rg.flow.display_name]["route_groups"][rg.display_name] = full_rg_id + stats.route_groups[rg.flow.display_name].append(rg.data) + + return stats + + def process_route_groups_directory( + self, flow: types.Flow, stats: types.AgentData): + """Process Route Groups dir in the JSON Package structure.""" + if "transitionRouteGroups" in os.listdir(flow.dir_path): + # Create a list of all Route Group paths to iter through + rg_paths = self.build_route_group_path_list(flow.dir_path) + stats.total_route_groups += len(rg_paths) + + full_flow_id = f"{stats.agent_id}/flows/{flow.resource_id}" + stats.route_groups_map[flow.display_name] = { + "id": full_flow_id, + "route_groups": {} + } + stats.route_groups[flow.display_name] = [] + + for rg_path in rg_paths: + rg = types.RouteGroup(flow=flow) + rg.rg_file = rg_path + stats = self.process_route_group(rg, stats) + + return stats diff --git a/src/dfcx_scrapi/agent_extract/routes.py b/src/dfcx_scrapi/agent_extract/routes.py new file mode 100644 index 00000000..f91d61ed --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/routes.py @@ -0,0 +1,320 @@ +"""Fulfillment routes processing methods and functions.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict, Any + +from dfcx_scrapi.agent_extract import common +from dfcx_scrapi.agent_extract import types + + +class Fulfillments: + """Fulfillment routes processing methods and functions.""" + + def __init__(self): + self.common = common.Common() + self.route_parameters = {} + + @staticmethod + def check_for_webhook(page: types.Page, path: Dict[str, Any]): + """Check the current route for existence of webhook.""" + if "webhook" in path: + page.has_webhook = True + + @staticmethod + def check_for_webhook_event_handlers(route: types.Fulfillment): + """Check for Webhook Error Event Handler on Page. + + In this method, we're interested in the following conditions: + - Page is currently flagged w/webhook = True + - Page HAS NOT been flagged w/having a webhook error handler + - The trigger MATCHES the pattern 'webhook.error' + + If a Page and its Route meet all the criteria, we'll flip the bit. + Otherwise, the webhook handler bit will remain False, causing a rule + flag.""" + + if all( + [ + route.page.has_webhook, + not route.page.has_webhook_event_handler, + "webhook.error" in route.trigger, + ] + ): + route.page.has_webhook_event_handler = True + + @staticmethod + def check_for_intent(route: types.Fulfillment): + """Check route data to see if Intent is present.""" + intent = None + if "intent" in route.data: + intent = route.data["intent"] + + return intent + + def process_intents_in_routes( + self, route: types.Fulfillment, stats: types.AgentData): + intent = self.check_for_intent(route) + if intent: + pair = (intent, route.page.display_name) + stats.active_intents[ + route.page.flow.display_name].append(pair) + + return stats + + def collect_transition_route_trigger(self, route): + """Inspect route and return all Intent/Condition info.""" + + trigger = [] + + if "intent" in route.data: + trigger.append("intent") + + if "condition" in route.data: + trigger.append("condition") + + if len(trigger) > 0: + trigger = "+".join(trigger) + + return trigger + + def get_trigger_info(self, route): + """Extract trigger info from route based on primary key.""" + + if route.fulfillment_type == "event": + trigger = f"event : {route.data.get('event', None)}" + + if route.fulfillment_type == "reprompt_handler": + trigger = f"{route.parameter} : event : "\ + f"{route.data.get('event', None)}" + + if route.fulfillment_type == "transition_route": + intent_condition = self.collect_transition_route_trigger(route) + trigger = f"route : {intent_condition}" + + return trigger + + def set_route_group_targets(self, page: types.Page, stats: types.AgentData): + """Determine Route Targets for Route Group routes.""" + current_page = page.display_name + + for route_group in page.route_groups: + page.flow.graph.add_edge(current_page, route_group) + page.flow.graph.add_used_node(route_group) + + stats.graph.add_edge(current_page, route_group) + stats.graph.add_used_node(route_group) + + return page, stats + + def set_route_targets( + self, route: types.Fulfillment, stats: types.AgentData): + """Determine the Route Targets for the specified route. + + Primary function is to build out the graph structure for the + Flow based on the current page and where the routes are pointing to. + The graph structure can then be traversed later to determine any errors + or inconsistencies in design. + """ + current_page = route.page.display_name + + route.target_flow = route.data.get("targetFlow", None) + route.target_page = route.data.get("targetPage", None) + + if route.target_page: + route.page.flow.graph.add_edge(current_page, route.target_page) + route.page.flow.graph.add_used_node(route.target_page) + + stats.graph.add_edge(current_page, route.target_page) + stats.graph.add_used_node(route.target_page) + + if route.target_flow: + route.page.flow.graph.add_edge( + current_page, f"FLOW: {route.target_flow}") + route.page.flow.graph.add_used_node(f"FLOW: {route.target_flow}") + + stats.graph.add_edge( + current_page, f"FLOW: {route.target_flow}" + ) + stats.graph.add_used_node(f"FLOW: {route.target_flow}") + + return route, stats + + def update_route_parameters( + self, route: types.Fulfillment, item: Dict[str, str]): + """Update the Route Parameters map based on new info.""" + flow_name = route.page.flow.display_name + page_name = route.page.display_name + + flow_data = self.route_parameters.get(flow_name, None) + page_data = None + + if flow_data: + page_data = flow_data.get(page_name, None) + + # Flow and Page already exists, append to existing list. + if page_data: + self.route_parameters[flow_name][page_name].append(item) + + # Flow data exists, but not Page, so only create the Page list. + elif flow_data and not page_data: + self.route_parameters[flow_name][page_name] = [item] + + # Neither the Flow or Page data exists, so create it all. + else: + self.route_parameters[flow_name] = {page_name: [item]} + + + def process_fulfillment_type( + self, stats: types.AgentData, route: types.Fulfillment, path: object, + key: str): + """Parse through specific fulfillment types.""" + fulfillment_data = path.get(key, None) + + if fulfillment_data: + for item in fulfillment_data: + # This is where each message type will exist + # text, custom payload, etc. + + if "text" in item: + for text in item["text"]["text"]: + route.text = text + + if "parameter" in item: + self.update_route_parameters(route, item) + + return stats + + def process_reprompt_handlers( + self, fp: types.FormParameter, stats: types.AgentData): + """Processing for Reprompt Event Handlers inside Form parameters. + + While Reprompt Event Handlers are technically Events, they differ from + standard Page level Events because they act on the FormParameter data + structure, not Fulfillment Route data structure as standard Events do. + """ + if not fp.reprompt_handlers: + return stats + + for handler in fp.reprompt_handlers: + route = types.Fulfillment(page=fp.page) + route.data = handler + route.agent_id = fp.page.agent_id + route.fulfillment_type = "reprompt_handler" + route.parameter = fp.display_name + route.trigger = self.get_trigger_info(route) + route, stats = self.set_route_targets(route, stats) + path = route.data.get("triggerFulfillment", None) + event = route.data.get("event", None) + + stats = self.process_intents_in_routes(route, stats) + + if not path and not event: + continue + + # Flag for Webhook Handler + self.check_for_webhook(fp.page, path) + + stats = self.process_fulfillment_type( + stats, route, path, "messages") + + return stats + + def process_events(self, page: types.Page, stats: types.AgentData): + """Parse through all Page Event Handlers.""" + if not page.events: + return stats + + for route_data in page.events: + route = types.Fulfillment(page=page) + route.data = route_data + route.agent_id = page.agent_id + route.fulfillment_type = "event" + route.trigger = self.get_trigger_info(route) + route, stats = self.set_route_targets(route, stats) + path = route.data.get("triggerFulfillment", None) + event = route.data.get("event", None) + + stats = self.process_intents_in_routes(route, stats) + + if not path and not event: + continue + + # Flag for Webhook Handler + self.check_for_webhook_event_handlers(route) + + stats = self.process_fulfillment_type( + stats, route, path, "messages") + + return stats + + def process_routes(self, page: types.Page, stats: types.AgentData): + """Parse through all Transition Routes.""" + tf_key = "triggerFulfillment" + + if not page.routes: + return stats + + for route_data in page.routes: + route = types.Fulfillment(page=page) + route.data = route_data + route.agent_id = page.agent_id + route.fulfillment_type = "transition_route" + route.trigger = self.get_trigger_info(route) + route, stats = self.set_route_targets(route, stats) + + stats = self.process_intents_in_routes(route, stats) + + path = route.data.get(tf_key, None) + + if not path: + continue + + # Flag for Webhook Handler + self.check_for_webhook(page, path) + + stats = self.process_fulfillment_type( + stats, route, path, "messages") + + # Preset Params processed here + stats = self.process_fulfillment_type( + stats, route, path, "setParameterActions" + ) + + return stats + + def process_entry(self, page: types.Page, stats: types.AgentData): + """Process Entry Fulfillment on a single page file. + + The Entry Fulfillment to a Page only has 1 "route" (i.e. itself) so + there is no need to loop through multiple routes, as they don't + exist for Entry Fulfillment. + """ + + if not page.entry: + return stats + + route = types.Fulfillment(page=page) + route.data = page.entry + route.agent_id = page.agent_id + route.fulfillment_type = "entry" + route.trigger = "entry" + path = route.data + + self.check_for_webhook(page, path) + + stats = self.process_fulfillment_type(stats, route, path, "messages") + + return stats diff --git a/src/dfcx_scrapi/agent_extract/test_cases.py b/src/dfcx_scrapi/agent_extract/test_cases.py new file mode 100644 index 00000000..ee5be205 --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/test_cases.py @@ -0,0 +1,184 @@ +"""Test Case processing methods and functions.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os + +from typing import Dict, List, Any + +from dfcx_scrapi.agent_extract import common +from dfcx_scrapi.agent_extract import types + + +class TestCases: + """Test Case processing methods and functions.""" + + def __init__(self): + self.common = common.Common() + + @staticmethod + def build_test_case_path_list(agent_local_path: str): + """Builds a list of files, each representing a test case.""" + root_dir = agent_local_path + "/testCases" + + test_case_paths = [] + + for test_case in os.listdir(root_dir): + end = test_case.split(".")[-1] + if end == "json": + test_case_path = f"{root_dir}/{test_case}" + test_case_paths.append(test_case_path) + + return test_case_paths + + @staticmethod + def get_test_case_intent_phrase_pair( + tc: types.TestCase) -> List[Dict[str, str]]: + """Parse Test Case and return a list of intents in use. + + This method will produce a List of Dicts where the contents of each + dict is the Training Phrase and associated Triggered Intent as listed + in the Test Case Conversation Turn. This information is used to compare + the User Input training phrase with the actual training phrases that + exist in the Intent resource. + + The dict format is as follows: + { + training_phrase: , + intent: + } + """ + intent_data = [] + + if tc.conversation_turns: + for turn in tc.conversation_turns: + user = turn["userInput"] + agent = turn["virtualAgentOutput"] + intent = agent.get("triggeredIntent", None) + phrase = user.get("input", None) + + text = phrase.get("text", None) + + if text: + text = text["text"] + + if intent and text: + intent_data.append( + { + "user_utterance": text, + "intent": intent["name"], + "status": "valid", + "training_phrases": [], + } + ) + + return intent_data + + @staticmethod + def get_test_case_intent_data(agent_local_path: str): + """Collect all Intent Files and Training Phrases for Test Case.""" + intents_path = agent_local_path + "/intents" + + intent_paths = [] + + for intent_dir in os.listdir(intents_path): + intent_dir_path = f"{intents_path}/{intent_dir}" + intent_paths.append( + {"intent": intent_dir, "file_path": intent_dir_path} + ) + + return intent_paths + + @staticmethod + def flatten_tp_data(tp_data: List[Any]): + """Flatten the Training Phrase proto to a list of strings.""" + cleaned_tps = [] + + for tp in tp_data["trainingPhrases"]: + parts_list = [part["text"].lower() for part in tp["parts"]] + cleaned_tps.append("".join(parts_list)) + + return cleaned_tps + + def gather_intent_tps(self, tc: types.TestCase): + """Collect all TPs associated with Intent data in Test Case.""" + tc.associated_intent_data = {} + + for i, pair in enumerate(tc.intent_data): + intent_dir = tc.agent_path + "/intents/" + pair["intent"] + + try: + if "trainingPhrases" in os.listdir(intent_dir): + training_phrases_path = intent_dir + "/trainingPhrases" + + for lang_file in os.listdir(training_phrases_path): + # lang_code = lang_file.split(".")[0] + lang_code_path = f"{training_phrases_path}/{lang_file}" + + with open( + lang_code_path, "r", encoding="UTF-8" + ) as tp_file: + tp_data = json.load(tp_file) + cleaned_tps = self.flatten_tp_data(tp_data) + + tp_file.close() + + tc.intent_data[i]["training_phrases"].extend( + cleaned_tps + ) + tc.associated_intent_data[pair["intent"]] = cleaned_tps + + except FileNotFoundError: + tc.intent_data[i]["status"] = "invalid_intent" + tc.has_invalid_intent = True + continue + + return tc + + def process_test_case(self, tc: types.TestCase, stats: types.AgentData): + """Process a single Test Case file.""" + + with open(tc.dir_path, "r", encoding="UTF-8") as tc_file: + tc.data = json.load(tc_file) + tc.resource_id = tc.data.get("name", None) + tc.display_name = tc.data.get("displayName", None) + tc.tags = tc.data.get("tags", None) + tc.conversation_turns = tc.data.get( + "testCaseConversationTurns", None + ) + tc.test_config = tc.data.get("testConfig", None) + + full_tc_id = f"{stats.agent_id}/testCases/{tc.resource_id}" + tc.data["name"] = full_tc_id + stats.test_cases.append(tc.data) + + tc_file.close() + + return stats + + def process_test_cases_directory( + self, agent_local_path: str, stats: types.AgentData): + """Processing the test cases dir in the JSON package structure.""" + test_case_paths = self.build_test_case_path_list(agent_local_path) + stats.total_test_cases = len(test_case_paths) + + for test_case in test_case_paths: + tc = types.TestCase() + tc.dir_path = test_case + tc.agent_path = agent_local_path + stats = self.process_test_case(tc, stats) + + return stats diff --git a/src/dfcx_scrapi/agent_extract/types.py b/src/dfcx_scrapi/agent_extract/types.py new file mode 100644 index 00000000..ac25c34d --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/types.py @@ -0,0 +1,212 @@ +"""Collection of Type Classes used for offline processing.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict, List, Any, Tuple +from dataclasses import dataclass, field + +from dfcx_scrapi.agent_extract import graph as graph_class + +@dataclass +class AgentMetadata: + """Used to track the current Agent Metadata attrinbutes.""" + + default_language_code: str = None + dtmf_settings: bool = False + logging_enabled: bool = False + speech_adaptation: bool = False + + +@dataclass +class Flow: + """Used to track current Flow Attributes.""" + agent_id: str = None + all_pages: set = field(default_factory=set) + active_pages: set = field(default_factory=set) + data: Dict[str, Any] = field(default_factory=dict) + dangling_pages: set = field(default_factory=set) + dir_path: str = None # Full Directory Path for this Flow + display_name: str = None # Flow Display Name (removed special chars) + file_name: str = None # Original Name of Flow (includes special chars) + graph: graph_class.Graph = None + resource_id: str = None + resource_type: str = "flow" + start_page_file: str = None # File Path Location of START_PAGE + unreachable_pages: set = field(default_factory=set) + unused_pages: set = field(default_factory=set) + +@dataclass +class Page: + """Used to track current Page Attributes.""" + + agent_id: str = None + data: Dict[str, Any] = None + display_name: str = None + entry: Dict[str, Any] = None + events: List[object] = None + flow: Flow = None + form: Dict[str, Any] = None + has_webhook: bool = False + has_webhook_event_handler: bool = False + page_file: str = None + resource_id: str = None + resource_type: str = "page" + routes: List[object] = None + route_groups: List[str] = None + +@dataclass +class FormParameter: + """Tracks Form Paramter attributes within a Page.""" + + advanced_settings: str = None + agent_id: str = None + data: Dict[str, Any] = None + display_name: str = None + dtmf_settings: str = None + entity_type: str = None + fill_behavior: Dict[str, Any] = None + init_fulfillment: Dict[str, Any] = None + page: Page = None + reprompt_handlers: Dict[str, Any] = None + required: bool = True + + +@dataclass +class RouteGroup: + """Used to track current RouteGroup Attributes.""" + + agent_id: str = None + data: Dict[str, Any] = None + display_name: str = None + flow: Flow = None + resource_id: str = None + resource_type: str = "route_group" + rg_file: str = None + routes: List[object] = None + +@dataclass +class Fulfillment: + """Used to track current Fulfillment Attributes.""" + + agent_id: str = None + data: Dict[str, Any] = None + display_name: str = None # Inherit from Page easy logging + fulfillment_type: str = None # transition_route | event + page: Page = None + parameter: str = None # Used for Reprompt Event Handlers + target_flow: str = None + target_page: str = None + text: str = None + trigger: str = None + resource_type: str = "fulfillment" + +@dataclass +class Intent: + """Used to track current Intent Attributes.""" + + agent_id: str = None + data: Dict[str, Any] = None + description: str = None + display_name: str = None + dir_path: str = None + labels: Dict[str, str] = None + metadata_file: str = None + parameters: List[Dict[str, str]] = field(default_factory=list) + resource_id: str = None + resource_type: str = "intent" + training_phrases: Dict[str, Any] = field(default_factory=dict) + +@dataclass +class EntityType: + """Used to track current Flow Attributes.""" + + agent_id: str = None + auto_expansion: str = None + data: Dict[str, Any] = None + dir_path: str = None # Full Directory Path for this Entity Type + display_name: str = None # Entity Type Display Name + entities: Dict[str, Any] = field(default_factory=dict) # Map + excluded_phrases: Dict[str, Any] = field(default_factory=dict) # Map + fuzzy_extraction: bool = False + kind: str = None # The kind of Entity Type represented + resource_id: str = None + resource_type: str = "entity_type" + +@dataclass +class TestCase: + """Used to track current Test Case Attributes.""" + + associated_intent_data: Dict[str, Any] = None + agent_id: str = None + agent_path: str = None + conversation_turns: List[Any] = None + data: Dict[str, Any] = None + dir_path: str = None + display_name: str = None + has_invalid_intent: bool = False + intent_data: List[str] = None + qualified: bool = False + resource_id: str = None + resource_type: str = "test_case" + tags: List[str] = None + test_config: Dict[str, Any] = None + +@dataclass +class Webhook: + """Used to track current Webhook attributes.""" + + agent_id: str = None + agent_path: str = None + data: Dict[str, Any] = None + dir_path: str = None + display_name: str = None + resource_id: str = None + resource_type: str = "webhook" + service_type: str = None + timeout: int = 0 + +@dataclass +class AgentData: + """Used to track agent data for each section processed.""" + active_intents: Dict[str, List[Tuple[str, str]]] = field( + default_factory=dict) + active_pages: Dict[str, set] = field(default_factory=dict) + agent_id: str = None + entity_types: List[Dict[str, Any]] = field(default_factory=list) + entity_types_map: Dict[str, Any] = field(default_factory=dict) + flow_page_map: Dict[str, Any] = field(default_factory=dict) + flows: List[Dict[str, Any]] = field(default_factory=list) + flows_map: Dict[str, Any] = field(default_factory=dict) + graph: graph_class.Graph = None + intents: List[Dict[str, Any]] = field(default_factory=list) + intents_map: Dict[str, Any] = field(default_factory=dict) + lang_code: str = "en" + pages: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict) + route_groups: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict) + route_groups_map: Dict[str, Any] = field(default_factory=dict) + test_cases: List[Dict[str, Any]] = field(default_factory=list) + unreachable_pages: Dict[str, set] = field(default_factory=dict) + unused_pages: Dict[str, set] = field(default_factory=dict) + webhooks: List[Dict[str, Any]] = field(default_factory=list) + webhooks_map: Dict[str, Any] = field(default_factory=dict) + + total_flows: int = 0 + total_pages: int = 0 + total_intents: int = 0 + total_training_phrases: int = 0 + total_entity_types: int = 0 + total_route_groups: int = 0 + total_test_cases: int = 0 + total_webhooks: int = 0 diff --git a/src/dfcx_scrapi/agent_extract/webhooks.py b/src/dfcx_scrapi/agent_extract/webhooks.py new file mode 100644 index 00000000..38467080 --- /dev/null +++ b/src/dfcx_scrapi/agent_extract/webhooks.py @@ -0,0 +1,95 @@ +"""Webhook processing methods and functions.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os + +from dfcx_scrapi.agent_extract import common +from dfcx_scrapi.agent_extract import types + +class Webhooks: + """Webhook linter methods and functions.""" + + def __init__(self): + self.common = common.Common() + + @staticmethod + def build_webhook_path_list(agent_local_path: str): + """Builds a list of webhook file locations.""" + root_dir = agent_local_path + "/webhooks" + + webhook_paths = [] + + for webhook_file in os.listdir(root_dir): + webhook_file_path = f"{root_dir}/{webhook_file}" + webhook_paths.append(webhook_file_path) + + return webhook_paths + + @staticmethod + def get_service_type(webhook: types.Webhook) -> str: + """Get the type of Webhook Service that is cofigured.""" + if "genericWebService" in webhook.data: + webhook.service_type = "Generic Web Service" + + else: + webhook.service_type = "Other" + + return webhook.service_type + + def process_webhook(self, webhook: types.Webhook, stats: types.AgentData + ) -> types.AgentData: + """Process a single Webhook file.""" + + with open(webhook.dir_path, "r", encoding="UTF-8") as webhook_file: + webhook.data = json.load(webhook_file) + webhook.resource_id = webhook.data.get("name", None) + webhook.display_name = webhook.data.get("displayName", None) + webhook.service_type = self.get_service_type(webhook) + + timeout_dict = webhook.data.get("timeout", None) + if timeout_dict: + webhook.timeout = timeout_dict.get("seconds", None) + + webhook_file.close() + + full_webhook_id = f"{stats.agent_id}/webhooks/{webhook.resource_id}" + webhook.data["name"] = full_webhook_id + stats.webhooks.append(webhook.data) + stats.total_webhooks += 1 + + return stats + + def process_webhooks_directory(self, agent_local_path: str, + stats: types.AgentData) -> types.AgentData: + """Processing the top level Webhooks Dir in the JSON Package structure. + + The following files exist under the `webhooks` dir: + - .json + """ + # Create a list of all Webhook paths to iter through + webhook_paths = self.build_webhook_path_list(agent_local_path) + + for webhook_path in webhook_paths: + webhook = types.Webhook() + webhook.dir_path = webhook_path + + stats = self.process_webhook(webhook, stats) + + full_webhook_id = f"{stats.agent_id}/webhooks/{webhook.resource_id}" + stats.webhooks_map[webhook.display_name] = full_webhook_id + + return stats diff --git a/src/dfcx_scrapi/builders/intents.py b/src/dfcx_scrapi/builders/intents.py index 8e1d4739..d7a360a8 100644 --- a/src/dfcx_scrapi/builders/intents.py +++ b/src/dfcx_scrapi/builders/intents.py @@ -62,6 +62,7 @@ def _include_spaces_to_phrase(self, phrase: List[str], annots: List[str]): A list of strings that represents parameter_id of each part in phrase. """ + chars_to_ignore_at_beginning = ["'", ",", ".", "?", "!"] i = 0 while True: p_curr, a_curr = phrase[i], annots[i] @@ -75,7 +76,13 @@ def _include_spaces_to_phrase(self, phrase: List[str], annots: List[str]): annots.insert(i+1, "") i += 2 elif a_curr and not a_next: - phrase[i+1] = " " + p_next + flag = any( + ch + for ch in chars_to_ignore_at_beginning + if p_next.startswith(ch) + ) + if not flag: + phrase[i+1] = " " + p_next i += 1 elif not a_curr and a_next: phrase[i] = p_curr + " " diff --git a/src/dfcx_scrapi/core/agents.py b/src/dfcx_scrapi/core/agents.py index 992c99dd..7e441528 100644 --- a/src/dfcx_scrapi/core/agents.py +++ b/src/dfcx_scrapi/core/agents.py @@ -53,8 +53,18 @@ def __init__( self.agent_id = agent_id self.client_options = self._set_region(agent_id) - def _build_list_agents_client_request(self, location_id): - """Builds the List Agents Request object.""" + @scrapi_base.api_call_counter_decorator + def _list_agents_client_request(self, location_id) -> List[ + types.agent.Agent + ]: + """Builds the List Agents Request object. + + Args: + location_id: The GCP Location ID in the following format: + `projects//locations/` + + Returns: + List of types.agent.Agent""" request = types.agent.ListAgentsRequest() request.parent = location_id @@ -64,31 +74,34 @@ def _build_list_agents_client_request(self, location_id): credentials=self.creds, client_options=client_options ) - return client, request + response = client.list_agents(request) + + agents = [] + for page in response.pages: + for agent in page.agents: + agents.append(agent) + + return agents def list_agents( self, - location_id: str = None, - project_id: str = None) -> List[types.Agent]: + project_id: str, + location_id: str = None) -> List[types.Agent]: """Get list of all CX agents in a given GCP Region or Project. - This method allows you to provide a specific Location ID consisting of - a GCP Project ID and Location ID (i.e. GCP Region Name) to retrieve all - of the CX agents associated with that Project/Region. Optionally, you - can provide just the Project ID and the funciton will traverse ALL - available GCP regions to list ALL agents across the regions. + This method allows you to provide a GCP Project ID to retrieve all of + the CX agents across ALL available GCP region. If the optional location + ID is provided, the method will only pull the agents for that region. Args: - location_id: The GCP Project/Location ID in the following format - `projects//locations/` - `projects/my-gcp-project/locations/us-central1` - project_id: The GCP Project ID as a string + project_id: The GCP Project ID. Ex: `my-cool-gcp-project` + location_id: The GCP Location ID. Ex: `global`, `us-central1`, etc. Returns: List of Agent objects """ - if project_id: + if not location_id: region_list = [ "global", "us-central1", @@ -105,26 +118,15 @@ def list_agents( agents = [] for region in region_list: location_path = f"projects/{project_id}/locations/{region}" - client, request = self._build_list_agents_client_request( - location_path - ) - - agents += self.list_agents(location_id=location_path) + agents += self._list_agents_client_request(location_path) else: - client, request = self._build_list_agents_client_request( - location_id - ) - - response = client.list_agents(request) - - agents = [] - for page in response.pages: - for agent in page.agents: - agents.append(agent) + location_path = f"projects/{project_id}/locations/{location_id}" + agents = self._list_agents_client_request(location_path) return agents + @scrapi_base.api_call_counter_decorator def get_agent(self, agent_id: str) -> types.Agent: """Retrieves a single CX Agent resource object. @@ -178,14 +180,12 @@ def get_agent_by_display_name( """ if location_id: - agent_list = self.list_agents( - location_id=location_id - ) + agent_list = self._list_agents_client_request(location_id) elif region: - agent_list = self.list_agents( - location_id=f"projects/{project_id}/locations/{region}" - ) + agent_list = self._list_agents_client_request( + f"projects/{project_id}/locations/{region}" + ) else: agent_list = self.list_agents(project_id=project_id) @@ -216,6 +216,7 @@ def get_agent_by_display_name( return matched_agent + @scrapi_base.api_call_counter_decorator def create_agent( self, project_id: str, @@ -273,9 +274,11 @@ def create_agent( return response + @scrapi_base.api_call_counter_decorator def validate_agent( self, - agent_id: str = None, + agent_id: str, + language_code: str = "en", timeout: float = None) -> Dict: """Initiates the Validation of the CX Agent or Flow. @@ -296,6 +299,7 @@ def validate_agent( request = types.agent.ValidateAgentRequest() request.name = agent_id + request.language_code = language_code client_options = self._set_region(agent_id) client = services.agents.AgentsClient( @@ -309,6 +313,7 @@ def validate_agent( return val_dict + @scrapi_base.api_call_counter_decorator def get_validation_result( self, agent_id: str = None, @@ -354,11 +359,16 @@ def get_validation_result( return val_results_dict + @scrapi_base.api_call_counter_decorator def export_agent( self, agent_id: str, gcs_bucket_uri: str, - environment_display_name: str = None + environment_display_name: str = None, + data_format: str = "BLOB", + git_branch: str = None, + git_commit_message: str = None, + include_bq_export_settings: bool = False ) -> str: """Exports the specified CX agent to Google Cloud Storage bucket. @@ -368,17 +378,39 @@ def export_agent( gcs_bucket_uri: The Google Cloud Storage bucket/filepath to export the agent to in the following format: `gs:///` - environment_display_name: (Optional) CX Agent environment display name + environment_display_name: CX Agent environment display name as string. If not set, DRAFT environment is assumed. + data_format: Optional. The data format of the exported agent. If not + specified, ``BLOB`` is assumed. + git_branch: Optional. The Git branch to commit the exported agent to. + git_commit_message: Optional. The Git Commit message to send. Only + applicable if using `git_branch` arg. + include_bigquery_export_settings: Will exclude or included the BQ + settings on export. Returns: A Long Running Operation (LRO) ID that can be used to check the status of the export using dfcx_scrapi.core.operations->get_lro() """ + blob_format = types.agent.ExportAgentRequest.DataFormat(1) + json_format = types.agent.ExportAgentRequest.DataFormat(4) + request = types.agent.ExportAgentRequest() request.name = agent_id request.agent_uri = gcs_bucket_uri + request.include_bigquery_export_settings = include_bq_export_settings + + if data_format in ["JSON", "ZIP", "JSON_PACKAGE"]: + request.data_format = json_format + else: + request.data_format = blob_format + + if git_branch: + git_settings = types.agent.ExportAgentRequest.GitDestination() + git_settings.tracking_branch = git_branch + git_settings.commit_message = git_commit_message + request.git_destination = git_settings if environment_display_name: self._environments = environments.Environments(creds=self.creds) @@ -402,6 +434,7 @@ def export_agent( return response.operation.name + @scrapi_base.api_call_counter_decorator def restore_agent(self, agent_id: str, gcs_bucket_uri: str) -> str: """Restores a CX agent from a gcs_bucket location. @@ -411,8 +444,8 @@ def restore_agent(self, agent_id: str, gcs_bucket_uri: str) -> str: Args: agent_id: CX Agent ID string in the following format projects//locations//agents/ - gcs_bucket_uri: The Google Cloud Storage bucket/filepath to export the - agent to in the following format: + gcs_bucket_uri: The Google Cloud Storage bucket/filepath to restore + the agent from in the following format: `gs:///` Returns: @@ -433,6 +466,7 @@ def restore_agent(self, agent_id: str, gcs_bucket_uri: str) -> str: return response.operation.name + @scrapi_base.api_call_counter_decorator def update_agent( self, agent_id: str, obj: types.Agent = None, **kwargs ) -> types.Agent: @@ -470,6 +504,7 @@ def update_agent( return response + @scrapi_base.api_call_counter_decorator def delete_agent(self, agent_id: str) -> str: """Deletes the specified Dialogflow CX Agent. diff --git a/src/dfcx_scrapi/core/changelogs.py b/src/dfcx_scrapi/core/changelogs.py index 21563268..db7ccf55 100644 --- a/src/dfcx_scrapi/core/changelogs.py +++ b/src/dfcx_scrapi/core/changelogs.py @@ -23,7 +23,7 @@ from google.cloud.dialogflowcx_v3beta1 import services from google.cloud.dialogflowcx_v3beta1 import types -from dfcx_scrapi.core.scrapi_base import ScrapiBase +from dfcx_scrapi.core import scrapi_base # logging config logging.basicConfig( @@ -33,7 +33,7 @@ ) -class Changelogs(ScrapiBase): +class Changelogs(scrapi_base.ScrapiBase): """Tools class that contains methods to support Change History feature.""" def __init__( @@ -76,6 +76,7 @@ def _validate_epoch_time(create_time_epoch_seconds: str): else: return True + @scrapi_base.api_call_counter_decorator def list_changelogs(self, agent_id: str = None, **kwargs): """Lists all Change History logs for a CX Agent. @@ -147,6 +148,7 @@ def list_changelogs(self, agent_id: str = None, **kwargs): return changelogs + @scrapi_base.api_call_counter_decorator def get_changelog(self, changelog_id: str): """Get a single changelog resource object. @@ -223,7 +225,7 @@ def changelogs_to_dataframe( ], ) - df = df.append(log_data) + df = pd.concat([df, log_data], ignore_index=True) df = df.reset_index(drop=True) diff --git a/src/dfcx_scrapi/core/conversation.py b/src/dfcx_scrapi/core/conversation.py index 8e23ec87..ce354e7b 100644 --- a/src/dfcx_scrapi/core/conversation.py +++ b/src/dfcx_scrapi/core/conversation.py @@ -19,7 +19,7 @@ import traceback import uuid -from typing import Dict +from typing import Dict, Any from operator import attrgetter from threading import Thread @@ -64,15 +64,12 @@ def __init__( agent_id=agent_id, ) - logging.info( + logging.debug( "create conversation with creds_path: %s | agent_id: %s", creds_path, agent_id) - if agent_id or config["agent_path"]: - self.agent_id = agent_id or config["agent_path"] - - self.language_code = language_code or config["language_code"] - + self.agent_id = self._set_agent_id(agent_id, config) + self.language_code = self._set_language_code(language_code, config) self.start_time = None self.query_result = None self.session_id = None @@ -82,6 +79,46 @@ def __init__( self.flows = flows.Flows(creds=self.creds) self.pages = pages.Pages(creds=self.creds) + @staticmethod + def _set_language_code(language_code: str, config: Dict[str, Any]) -> str: + """Determines how to set the language_code based on user inputs. + + We implement this for backwards compatability. + """ + # Config will take precedence if provided + if config: + config_lang_code = config.get("language_code", None) + + # We'll only return if it exist in the config on the off chance that + # some users have provided the langauge_code as a top level arg in + # addition to providing the config + if config_lang_code: + return config_lang_code + + return language_code + + @staticmethod + def _set_agent_id(input_agent_id: str, config: Dict[str, Any]) -> str: + """Determines how to set the agent_id based on user inputs. + + We implement this for backwards compatability. + """ + + # Config will take precedence if provided + if config: + config_agent_path = config.get("agent_path", None) + + # We'll only return if it exist in the config on the off chance that + # some users have provided the agent_id as a top level arg in + # addition to providing the config + if config_agent_path: + return config_agent_path + + elif input_agent_id: + return input_agent_id + + return None + @staticmethod def _get_match_type_from_map(match_type: int): """Translates the match_type enum int value into a more descriptive @@ -95,7 +132,8 @@ def _get_match_type_from_map(match_type: int): 4: "NO_MATCH", 5: "NO_INPUT", 6: "EVENT", - 8: "KNOWLEDGE_CONNECTOR" + 8: "KNOWLEDGE_CONNECTOR", + 9: "LLM" } return match_type_map[match_type] @@ -122,10 +160,9 @@ def progress_bar(current, total, bar_length=50, type_="Progress"): percent = float(current) * 100 / total arrow = "-" * int(percent / 100 * bar_length - 1) + ">" spaces = " " * (bar_length - len(arrow)) - print( - f"{type_}({current}/{total})" + f"[{arrow}{spaces}] {percent:.2f}%", - end="\r", - ) + logging.info( + f"{type_}({current}/{total})" + f"[{arrow}{spaces}] {percent:.2f}%" + ) @staticmethod def _build_query_params_object(parameters, current_page, disable_webhook): @@ -257,19 +294,6 @@ def _page_id_mapper(self): flow_mapped["page_id"] = flow_mapped.index flow_mapped = flow_mapped.rename(columns={0: "page_display_name"}) - - # add start page - start_page_id = flow_id + "/pages/START_PAGE" - flow_mapped = pd.concat( - [ - flow_mapped, - pd.DataFrame( - columns=["page_display_name", "page_id"], - data=[["START_PAGE", start_page_id]], - ), - ] - ) - flow_mapped.insert(0, "flow_display_name", flow_map[flow_id]) agent_pages_map = pd.concat([agent_pages_map, flow_mapped]) @@ -347,7 +371,7 @@ def set_agent_env(self, param, value): self.agent_env[param] = value def checkpoint(self, msg=None, start=False): - """Print a checkpoint to time progress and debug bottleneck""" + """Log a checkpoint to time progress and debug bottleneck""" if start: start_time = time.perf_counter() self.start_time = start_time @@ -356,8 +380,9 @@ def checkpoint(self, msg=None, start=False): duration = round((time.perf_counter() - start_time), 2) if duration > 2: if msg: - print(f"{duration:0.2f}s {msg}") + logging.info(f"{duration:0.2f}s {msg}") + @scrapi_base.api_call_counter_decorator def reply( self, send_obj: Dict[str, str], @@ -385,7 +410,7 @@ def reply( Returns: A dictionary for the agent reply to to the submitted text. Includes keys response_messages, confidence, page_name, - intent_name, match_type, match, other_intents, and params. + intent_name, match_type, match, and params. """ text = send_obj.get("text") send_params = send_obj.get("params") @@ -505,35 +530,12 @@ def reply( query_result.match.match_type ) reply["match"] = query_result.match - reply["other_intents"] = self.format_other_intents(query_result) reply["params"] = params logging.debug("reply %s", reply) return reply - def format_other_intents(self, query_result): - """Unwind protobufs into more friendly dict""" - other_intents = query_result.diagnostic_info.get( - "Alternative Matched Intents" - ) - items = [] - rank = 0 - for alt in other_intents: - items.append( - { - "name": alt.get("DisplayName"), - "score": alt.get("Score"), - "rank": rank, - } - ) - rank += 1 - - if self: - return items - - return None - def getpath(self, obj, xpath, default=None): """Get data at a pathed location out of object internals""" elem = obj diff --git a/src/dfcx_scrapi/core/entity_types.py b/src/dfcx_scrapi/core/entity_types.py index d90ba722..526d840c 100644 --- a/src/dfcx_scrapi/core/entity_types.py +++ b/src/dfcx_scrapi/core/entity_types.py @@ -22,7 +22,7 @@ from google.cloud.dialogflowcx_v3beta1 import types from google.protobuf import field_mask_pb2 -from dfcx_scrapi.core.scrapi_base import ScrapiBase +from dfcx_scrapi.core import scrapi_base # logging config logging.basicConfig( @@ -32,7 +32,7 @@ ) -class EntityTypes(ScrapiBase): +class EntityTypes(scrapi_base.ScrapiBase): """Core Class for CX Entity Type Resource functions.""" def __init__( @@ -43,6 +43,7 @@ def __init__( scope=False, entity_id: str = None, agent_id: str = None, + language_code: str = "en" ): super().__init__( creds_path=creds_path, @@ -53,6 +54,7 @@ def __init__( self.entity_id = entity_id self.agent_id = agent_id + self.language_code = language_code @staticmethod @@ -243,11 +245,13 @@ def get_entities_map(self, agent_id: str = None, reverse=False): return entities_dict - def list_entity_types(self, agent_id: str = None): + @scrapi_base.api_call_counter_decorator + def list_entity_types(self, agent_id: str, language_code: str = "en"): """Returns a list of Entity Type objects. Args: agent_id: the formatted CX Agent ID to use + language_code: Specifies the language of the Entity Types listed Returns: List of Entity Type objects @@ -257,6 +261,7 @@ def list_entity_types(self, agent_id: str = None): request = types.entity_type.ListEntityTypesRequest() request.parent = agent_id + request.language_code = language_code client_options = self._set_region(agent_id) client = services.entity_types.EntityTypesClient( @@ -272,11 +277,13 @@ def list_entity_types(self, agent_id: str = None): return entities - def get_entity_type(self, entity_id: str = None): + @scrapi_base.api_call_counter_decorator + def get_entity_type(self, entity_id: str = None, language_code: str = "en"): """Returns a single Entity Type object. Args: entity_id: the formatted CX Entity ID to get + language_code: Specifies the language of the Entity Types listed Returns: The single Entity Type object @@ -288,10 +295,15 @@ def get_entity_type(self, entity_id: str = None): client = services.entity_types.EntityTypesClient( credentials=self.creds, client_options=client_options ) - response = client.get_entity_type(name=entity_id) + request = types.entity_type.GetEntityTypeRequest() + request.name = entity_id + request.language_code = language_code + + response = client.get_entity_type(request=request) return response + @scrapi_base.api_call_counter_decorator def create_entity_type( self, agent_id: str = None, @@ -353,6 +365,7 @@ def create_entity_type( return response + @scrapi_base.api_call_counter_decorator def update_entity_type( self, entity_type_id: str = None, @@ -407,8 +420,9 @@ def update_entity_type( return response + @scrapi_base.api_call_counter_decorator def delete_entity_type(self, entity_id: str = None, obj=None) -> None: - """Deletes a single Entity Type resouce object. + """Deletes a single Entity Type resource object. Args: entity_id: the formatted CX Entity ID to delete diff --git a/src/dfcx_scrapi/core/environments.py b/src/dfcx_scrapi/core/environments.py index fe924aa2..6e516cb2 100644 --- a/src/dfcx_scrapi/core/environments.py +++ b/src/dfcx_scrapi/core/environments.py @@ -38,7 +38,7 @@ class Environments(scrapi_base.ScrapiBase): def __init__( self, creds_path: str = None, - creds_dict: Dict[str,str] = None, + creds_dict: Dict[str, str] = None, creds: service_account.Credentials = None, agent_id: str = None, ): @@ -129,7 +129,8 @@ def get_environments_map( return environments_dict - def list_environments(self, agent_id:str=None): + @scrapi_base.api_call_counter_decorator + def list_environments(self, agent_id: str = None): """List all Versions for a given Flow""" if not agent_id: @@ -152,9 +153,10 @@ def list_environments(self, agent_id:str=None): return environments + @scrapi_base.api_call_counter_decorator def get_environment( self, - environment_id:str) -> types.environment.Environment: + environment_id: str) -> types.environment.Environment: """Get Environment object for specified environment ID. Args: @@ -178,8 +180,8 @@ def get_environment( def get_environment_by_display_name( self, - display_name:str, - agent_id:str) -> types.environment.Environment: + display_name: str, + agent_id: str) -> types.environment.Environment: """Get Environment object for specific environment by its display name. Args: @@ -201,10 +203,11 @@ def get_environment_by_display_name( return result + @scrapi_base.api_call_counter_decorator def create_environment( self, - environment:types.environment.Environment, - agent_id:str=None): + environment: types.environment.Environment, + agent_id: str = None): """Create a new environment for a specified agent. Args: environment: The environment to create. @@ -235,10 +238,10 @@ def create_environment( def create_environment_by_display_name( self, - display_name:str, - version_configs:List[Tuple[str,str]], - description:str=None, - agent_id:str=None): + display_name: str, + version_configs: List[Tuple[str, str]], + description: str = None, + agent_id: str = None): """Create a new environment for a specified agent. Args: display_name: The display name of the Environment to create @@ -286,10 +289,11 @@ def create_environment_by_display_name( return response + @scrapi_base.api_call_counter_decorator def update_environment( self, environment_id: str, - environment_obj:types.Environment = None, + environment_obj: types.Environment = None, **kwargs): """Update an existing environment for a specified agent. @@ -330,7 +334,8 @@ def update_environment( return response - def delete_environment(self, environment_id:str): + @scrapi_base.api_call_counter_decorator + def delete_environment(self, environment_id: str): """Delete a specified environment. Args: @@ -350,10 +355,11 @@ def delete_environment(self, environment_id:str): client.delete_environment(request) + @scrapi_base.api_call_counter_decorator def deploy_flow_to_environment( self, - environment_id:str, - flow_version:str): + environment_id: str, + flow_version: str): """Deploys a flow to the specified environment. Args: @@ -382,9 +388,10 @@ def deploy_flow_to_environment( return response + @scrapi_base.api_call_counter_decorator def lookup_environment_history( self, - environment_id:str) -> List[types.Environment]: + environment_id: str) -> List[types.Environment]: """Looks up the history of the specified environment. Args: @@ -413,7 +420,8 @@ def lookup_environment_history( return history - def list_continuous_test_results(self, environment_id:str): + @scrapi_base.api_call_counter_decorator + def list_continuous_test_results(self, environment_id: str): """Fetches a list of continuous test results for a given environment. Args: diff --git a/src/dfcx_scrapi/core/experiments.py b/src/dfcx_scrapi/core/experiments.py index ca0655c2..eac0c84e 100644 --- a/src/dfcx_scrapi/core/experiments.py +++ b/src/dfcx_scrapi/core/experiments.py @@ -19,7 +19,7 @@ from typing import Dict from google.cloud.dialogflowcx_v3beta1 import services from google.cloud.dialogflowcx_v3beta1 import types -from dfcx_scrapi.core.scrapi_base import ScrapiBase +from dfcx_scrapi.core import scrapi_base # logging config logging.basicConfig( @@ -34,7 +34,7 @@ ] -class ScrapiExperiments(ScrapiBase): +class ScrapiExperiments(scrapi_base.ScrapiBase): """Wrapper for working with Experiments""" def __init__( @@ -55,7 +55,8 @@ def __init__( logging.info("created %s", self.agent_id) - def list_experiments(self, environment_id=None): + @scrapi_base.api_call_counter_decorator + def list_experiments(self, environment_id: str = None): """List out experiments. Args: @@ -75,7 +76,7 @@ def list_experiments(self, environment_id=None): client_options=client_options, credentials=self.creds ) response = client.list_experiments(request) - blob = ScrapiBase.cx_object_to_json(response) + blob = scrapi_base.ScrapiBase.cx_object_to_json(response) if len(blob) < 1: logging.warning( diff --git a/src/dfcx_scrapi/core/flows.py b/src/dfcx_scrapi/core/flows.py index 3646898c..da236136 100644 --- a/src/dfcx_scrapi/core/flows.py +++ b/src/dfcx_scrapi/core/flows.py @@ -15,11 +15,13 @@ # limitations under the License. import logging +import time from typing import Dict, List from google.cloud.dialogflowcx_v3beta1 import services from google.cloud.dialogflowcx_v3beta1 import types from google.protobuf import field_mask_pb2 from dfcx_scrapi.core import scrapi_base +from dfcx_scrapi.core import pages # logging config logging.basicConfig( @@ -54,6 +56,7 @@ def __init__( self.flow_id = flow_id self.agent_id = agent_id + self.pages = pages.Pages(creds=self.creds) # TODO: Migrate to Flow Builder class when ready @staticmethod @@ -129,6 +132,39 @@ def get_flows_map(self, agent_id: str, reverse=False): return flows_dict + def get_flow_page_map( + self, agent_id: str, rate_limit: float = 1.0 + ) -> Dict[str, Dict[str, str]]: + """Exports a user friendly dict containing Flows, Pages, and IDs + This method builds on top of `get_flows_map` and builds out a nested + dictionary containing all of the Page Display Names and UUIDs contained + within each Flow. Output Format: + { + : { + 'id': + 'pages': { : } + } + } + + Args: + agent_id: the formatted CX Agent ID to use + + Returns: + Dictionary containing Flow Names/UUIDs and Page Names/UUIDs + """ + flow_page_map = {} + + flows_map = self.get_flows_map(agent_id, reverse=True) + + for flow in flows_map: + pages_map = self.pages.get_pages_map( + flows_map[flow], reverse=True) + flow_page_map[flow] = {"id": flows_map[flow], "pages": pages_map} + time.sleep(rate_limit) + + return flow_page_map + + @scrapi_base.api_call_counter_decorator def train_flow(self, flow_id: str) -> str: """Trains the specified flow. @@ -155,6 +191,7 @@ def train_flow(self, flow_id: str) -> str: return response + @scrapi_base.api_call_counter_decorator def list_flows(self, agent_id: str) -> List[types.Flow]: """Get a List of all Flows in the current Agent. @@ -208,6 +245,7 @@ def get_flow_by_display_name( return flow + @scrapi_base.api_call_counter_decorator def get_flow(self, flow_id: str) -> types.Flow: """Get a single CX Flow object. @@ -226,6 +264,7 @@ def get_flow(self, flow_id: str) -> types.Flow: return response + @scrapi_base.api_call_counter_decorator def create_flow( self, agent_id: str, @@ -274,6 +313,7 @@ def create_flow( return response + @scrapi_base.api_call_counter_decorator def update_flow( self, flow_id: str, obj: types.Flow = None, **kwargs ) -> types.Flow: @@ -325,6 +365,7 @@ def update_nlu_settings(self, flow_id: str, **kwargs): setattr(current_settings, key, value) self.update_flow(flow_id=flow_id, nlu_settings=current_settings) + @scrapi_base.api_call_counter_decorator def export_flow( self, flow_id: str, gcs_path: str, ref_flows: bool = True ) -> Dict[str, str]: @@ -355,6 +396,7 @@ def export_flow( return response.result() + @scrapi_base.api_call_counter_decorator def export_flow_inline(self, flow_id: str, ref_flows: bool = True) -> bytes: """Export a Flow, returning uncompressed raw byte content for flow. @@ -377,6 +419,7 @@ def export_flow_inline(self, flow_id: str, ref_flows: bool = True) -> bytes: return (response.result()).flow_content + @scrapi_base.api_call_counter_decorator def import_flow( self, agent_id: str, @@ -425,6 +468,7 @@ def import_flow( return response + @scrapi_base.api_call_counter_decorator def delete_flow(self, flow_id: str, force: bool = False): """Deletes a single CX Flow Object resource. diff --git a/src/dfcx_scrapi/core/intents.py b/src/dfcx_scrapi/core/intents.py index aa55e307..74e93701 100644 --- a/src/dfcx_scrapi/core/intents.py +++ b/src/dfcx_scrapi/core/intents.py @@ -23,7 +23,7 @@ from google.cloud.dialogflowcx_v3beta1 import types from google.protobuf import field_mask_pb2 -from dfcx_scrapi.core.scrapi_base import ScrapiBase +from dfcx_scrapi.core import scrapi_base # logging config logging.basicConfig( @@ -33,7 +33,7 @@ ) -class Intents(ScrapiBase): +class Intents(scrapi_base.ScrapiBase): """Core Class for CX Intent Resource functions.""" def __init__( @@ -409,6 +409,7 @@ def get_intents_map(self, agent_id: str = None, reverse: bool = False): return intents_dict + @scrapi_base.api_call_counter_decorator def list_intents( self, agent_id: str = None, @@ -445,6 +446,7 @@ def list_intents( return intents + @scrapi_base.api_call_counter_decorator def get_intent( self, intent_id: str = None, @@ -477,6 +479,7 @@ def get_intent( return response + @scrapi_base.api_call_counter_decorator def create_intent( self, agent_id: str, @@ -531,6 +534,7 @@ def create_intent( return response + @scrapi_base.api_call_counter_decorator def update_intent( self, intent_id: str = None, @@ -582,6 +586,7 @@ def update_intent( return response + @scrapi_base.api_call_counter_decorator def delete_intent(self, intent_id: str, obj: types.Intent = None) -> None: """Deletes an intent by Intent ID. diff --git a/src/dfcx_scrapi/core/operations.py b/src/dfcx_scrapi/core/operations.py index 6cbcd070..05f2577f 100644 --- a/src/dfcx_scrapi/core/operations.py +++ b/src/dfcx_scrapi/core/operations.py @@ -16,8 +16,10 @@ import logging from typing import Dict + from google.api_core import operations_v1, grpc_helpers -from dfcx_scrapi.core.scrapi_base import ScrapiBase + +from dfcx_scrapi.core import scrapi_base # logging config logging.basicConfig( @@ -26,7 +28,7 @@ datefmt="%Y-%m-%d %H:%M:%S", ) -class Operations(ScrapiBase): +class Operations(scrapi_base.ScrapiBase): """Core class for Operations functions, primarily used to extract LRO information on long running jobs for CX. """ @@ -45,6 +47,7 @@ def __init__( scope=scope ) + @scrapi_base.api_call_counter_decorator def get_lro(self, lro: str): """Used to retrieve the status of LROs for Dialogflow CX. diff --git a/src/dfcx_scrapi/core/pages.py b/src/dfcx_scrapi/core/pages.py index d43be972..8c98ec9f 100644 --- a/src/dfcx_scrapi/core/pages.py +++ b/src/dfcx_scrapi/core/pages.py @@ -112,17 +112,26 @@ def get_pages_map( return pages_dict - def list_pages(self, flow_id: str = None) -> List[gcdc_page.Page]: + @scrapi_base.api_call_counter_decorator + def list_pages( + self, + flow_id: str = None, + language_code: str = "en") -> List[gcdc_page.Page]: """Get a List of all pages for the specified Flow ID. Args: flow_id: the properly formatted Flow ID string + language_code: Specifies the language of the Pages listed. While the + majority of contents of a Page is language agnostic, the contents + in the "Agent Says" and similar parts of a Page are affected by + language code. Returns: A List of CX Page objects for the specific Flow ID """ request = gcdc_page.ListPagesRequest() request.parent = flow_id + request.language_code = language_code client_options = self._set_region(flow_id) client = pages.PagesClient( @@ -137,6 +146,7 @@ def list_pages(self, flow_id: str = None) -> List[gcdc_page.Page]: return cx_pages + @scrapi_base.api_call_counter_decorator def get_page(self, page_id: str = None) -> gcdc_page.Page: """Get a single CX Page object based on the provided Page ID. @@ -158,6 +168,7 @@ def get_page(self, page_id: str = None) -> gcdc_page.Page: return response + @scrapi_base.api_call_counter_decorator def create_page( self, flow_id: str = None, obj: gcdc_page.Page = None, **kwargs ) -> gcdc_page.Page: @@ -191,6 +202,7 @@ def create_page( return response + @scrapi_base.api_call_counter_decorator def update_page( self, page_id: str = None, obj: gcdc_page.Page = None, **kwargs ) -> gcdc_page.Page: @@ -225,13 +237,17 @@ def update_page( return response - def delete_page(self, page_id: str = None) -> str: + @scrapi_base.api_call_counter_decorator + def delete_page(self, page_id: str = None, force: bool = False) -> str: """Deletes the specified Page. Args: page_id: CX Page ID string in the following Format: ``projects//locations//agents// flows//pages/`` + force: (Optional) This field has no effect for pages with no incoming + transitions. If set to True, Dialogflow will remove the page, + as well as any transitions to the page. Returns: String "Page `{page_id}` successfully deleted." @@ -240,6 +256,7 @@ def delete_page(self, page_id: str = None) -> str: client = pages.PagesClient( credentials=self.creds, client_options=client_options ) - client.delete_page(name=page_id) + req = gcdc_page.DeletePageRequest(name=page_id, force=force) + client.delete_page(request=req) return f"Page `{page_id}` successfully deleted." diff --git a/src/dfcx_scrapi/core/project.py b/src/dfcx_scrapi/core/project.py index 21d7dbfb..544b426b 100644 --- a/src/dfcx_scrapi/core/project.py +++ b/src/dfcx_scrapi/core/project.py @@ -18,7 +18,7 @@ import time from typing import Dict -from dfcx_scrapi.core.scrapi_base import ScrapiBase +from dfcx_scrapi.core import scrapi_base from dfcx_scrapi.core.agents import Agents # logging config @@ -29,7 +29,7 @@ ) -class Project(ScrapiBase): +class Project(scrapi_base.ScrapiBase): """Top Level class representing the Project level resources when working on a Dialogflow CX project. This Class will allow you to extract information about your GCP project as a whole in relation to diff --git a/src/dfcx_scrapi/core/scrapi_base.py b/src/dfcx_scrapi/core/scrapi_base.py index 62d6c9b6..a088c1ba 100644 --- a/src/dfcx_scrapi/core/scrapi_base.py +++ b/src/dfcx_scrapi/core/scrapi_base.py @@ -17,6 +17,8 @@ import logging import json import re +import functools +from collections import defaultdict from typing import Dict import numpy as np @@ -72,8 +74,10 @@ def __init__( if agent_id: self.agent_id = agent_id + self.api_calls_dict = defaultdict(int) + @staticmethod - def _set_region(item_id): + def _set_region(resource_id: str): """Different regions have different API endpoints Args: @@ -86,18 +90,27 @@ def _set_region(item_id): if the location is "global" """ try: - location = item_id.split("/")[3] + location = resource_id.split("/")[3] except IndexError as err: - logging.error("IndexError - path too short? %s", item_id) + logging.error("IndexError - path too short? %s", resource_id) raise err + project_id = resource_id.split("/")[1] + if location != "global": api_endpoint = f"{location}-dialogflow.googleapis.com:443" - client_options = {"api_endpoint": api_endpoint} + client_options = { + "api_endpoint": api_endpoint, + "quota_project_id": project_id} return client_options else: - return None # explicit None return when not required + api_endpoint = "dialogflow.googleapis.com:443" + client_options = { + "api_endpoint": api_endpoint, + "quota_project_id": project_id} + + return client_options @staticmethod def pbuf_to_dict(pbuf): @@ -268,6 +281,51 @@ def recurse_proto_marshal_to_dict(self, marshal_object): return new_dict + def get_api_calls_details(self) -> Dict[str, int]: + """The number of API calls corresponding to each method. + + Returns: + A dictionary with keys as the method names + and values as the number of calls. + """ + this_class_methods, sub_class_apis_dict = {}, {} + + for attr_name in dir(self): + attr = getattr(self, attr_name) + if callable(attr) and hasattr(attr, "calls_api"): + this_class_methods[attr_name] = 0 + if any( + isinstance(attr, sub_class) + for sub_class in ScrapiBase.__subclasses__() + ): + sub_class_apis_dict.update(attr.get_api_calls_details()) + + if hasattr(self, "api_calls_dict"): + this_class_methods.update(getattr(self, "api_calls_dict")) + + return {**this_class_methods, **sub_class_apis_dict} + + def get_api_calls_count(self) -> int: + """Show the total number of API calls for this resource. + + Returns: + Total calls to the API so far as an int. + """ + return sum(self.get_api_calls_details().values()) + + +def api_call_counter_decorator(func): + """Counts the number of API calls for the function `func`.""" + + @functools.wraps(func) + def wrapper(self, *args, **kwargs): + self.api_calls_dict[func.__name__] += 1 + return func(self, *args, **kwargs) + + wrapper.calls_api = True + + return wrapper + class _AllPagesCustomDict(dict): diff --git a/src/dfcx_scrapi/core/security_settings.py b/src/dfcx_scrapi/core/security_settings.py index d84a8cc2..0617478f 100644 --- a/src/dfcx_scrapi/core/security_settings.py +++ b/src/dfcx_scrapi/core/security_settings.py @@ -54,6 +54,7 @@ def __init__( self.ss_service = services.security_settings_service self.ss_types = types.security_settings + @scrapi_base.api_call_counter_decorator def list_security_settings(self, location_id: str): """List Security Settings for a given Project and Region. @@ -83,6 +84,7 @@ def list_security_settings(self, location_id: str): return security_settings + @scrapi_base.api_call_counter_decorator def get_security_settings(self, security_setting_id: str): """Get specified CCAI Security Setting. @@ -107,6 +109,7 @@ def get_security_settings(self, security_setting_id: str): return response + @scrapi_base.api_call_counter_decorator def create_security_settings( self, location_id: str, @@ -158,6 +161,7 @@ def create_security_settings( return response + @scrapi_base.api_call_counter_decorator def update_security_settings(self, security_setting_id: str, **kwargs): """Update specified CCAI Security Setting. @@ -192,6 +196,7 @@ def update_security_settings(self, security_setting_id: str, **kwargs): return response + @scrapi_base.api_call_counter_decorator def delete_security_settings(self, security_setting_id: str): """Delete the specified CCAI Security Setting. diff --git a/src/dfcx_scrapi/core/session_entity_types.py b/src/dfcx_scrapi/core/session_entity_types.py index c016acd1..62cbf215 100644 --- a/src/dfcx_scrapi/core/session_entity_types.py +++ b/src/dfcx_scrapi/core/session_entity_types.py @@ -193,6 +193,7 @@ def build_session_entity_type( return st + @scrapi_base.api_call_counter_decorator def list_session_entity_types( self, session_id: str, environment_id: str = None ) -> List[types.SessionEntityType]: @@ -236,6 +237,7 @@ def list_session_entity_types( return session_entities + @scrapi_base.api_call_counter_decorator def get_session_entity_type( self, session_entity_type_id: str, environment_id: str = None ) -> types.SessionEntityType: @@ -272,6 +274,7 @@ def get_session_entity_type( return response + @scrapi_base.api_call_counter_decorator def create_session_entity_type( self, session_id: str, session_entity_type: types.SessionEntityType ) -> types.SessionEntityType: @@ -299,6 +302,7 @@ def create_session_entity_type( return response + @scrapi_base.api_call_counter_decorator def update_session_entity_type( self, session_entity_type_id: str, @@ -361,6 +365,7 @@ def update_session_entity_type( return response + @scrapi_base.api_call_counter_decorator def delete_session_entity_type( self, session_entity_type_id: str, environment_id: str = None ) -> str: diff --git a/src/dfcx_scrapi/core/sessions.py b/src/dfcx_scrapi/core/sessions.py index 8922ecaf..12bab554 100644 --- a/src/dfcx_scrapi/core/sessions.py +++ b/src/dfcx_scrapi/core/sessions.py @@ -19,8 +19,9 @@ from typing import Dict, List from google.cloud.dialogflowcx_v3beta1 import services from google.cloud.dialogflowcx_v3beta1 import types +from google.protobuf.json_format import MessageToDict -from dfcx_scrapi.core.scrapi_base import ScrapiBase +from dfcx_scrapi.core import scrapi_base # logging config logging.basicConfig( @@ -29,7 +30,7 @@ datefmt="%Y-%m-%d %H:%M:%S", ) -class Sessions(ScrapiBase): +class Sessions(scrapi_base.ScrapiBase): """Core Class for CX Session Resource functions.""" def __init__( @@ -285,3 +286,17 @@ def preset_parameters( response = session_client.detect_intent(request=request) return response + + def get_agent_answer(self, user_query: str) -> str: + """Extract the answer/citation from a Vertex Conversation response.""" + + session_id = self.build_session_id(self.agent_id) + res = MessageToDict(self.detect_intent( # pylint: disable=W0212 + self.agent_id, session_id, user_query)._pb) + + answer_text = res["responseMessages"][0]["text"]["text"][0] + answer_link = res["responseMessages"][1]["payload"][ + "richContent"][0][0]["actionLink"] if len( + res["responseMessages"]) > 1 else "" + + return f"{answer_text} ({answer_link})" diff --git a/src/dfcx_scrapi/core/test_cases.py b/src/dfcx_scrapi/core/test_cases.py index 9258bcf5..b705a278 100644 --- a/src/dfcx_scrapi/core/test_cases.py +++ b/src/dfcx_scrapi/core/test_cases.py @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. - +import pandas as pd import logging from typing import Dict, List @@ -22,7 +22,9 @@ from google.cloud.dialogflowcx_v3beta1 import types from google.protobuf import field_mask_pb2 -from dfcx_scrapi.core.scrapi_base import ScrapiBase +from dfcx_scrapi.core import scrapi_base +from dfcx_scrapi.core import flows +from dfcx_scrapi.core import pages # logging config logging.basicConfig( @@ -32,7 +34,7 @@ ) -class TestCases(ScrapiBase): +class TestCases(scrapi_base.ScrapiBase): """Core Class for CX Test Cases.""" def __init__( @@ -59,12 +61,132 @@ def __init__( self.test_case_id = test_case_id self.client_options = self._set_region(self.test_case_id) - def list_test_cases(self, agent_id: str = None): + def _convert_test_result_to_string(self, test_case: types.TestCase) -> str: + """Converts the Enum result to a string.""" + if test_case.last_test_result.test_result == 0: + return "TEST_RESULT_UNSPECIFIED" + elif test_case.last_test_result.test_result == 1: + return "PASSED" + elif test_case.last_test_result.test_result == 2: + return "FAILED" + else: + return "" + + def _convert_test_result_to_bool(self, test_case: types.TestCase) -> bool: + """Converts the String result to a boolean.""" + test_result = self._convert_test_result_to_string(test_case) + + if test_result == "PASSED": + return True + elif test_result == "FAILED": + return False + else: + return None + + def _get_flow_id_from_test_config( + self, test_case: types.TestCase) -> str: + """Attempt to get the Flow ID from the Test Case Test Config.""" + if "flow" in test_case.test_config: + return test_case.test_config.flow + elif "page" in test_case.test_config: + return "/".join(test_case.test_config.page.split("/")[:8]) + else: + agent_id = "/".join(test_case.name.split("/")[:6]) + return f"{agent_id}/flows/00000000-0000-0000-0000-000000000000" + + def _get_page_id_from_test_config( + self, test_case: types.TestCase, flow_id: str) -> str: + """Attempt to get the Page ID from the Test Case Test Config.""" + if "page" in test_case.test_config: + return test_case.test_config.page + else: + return f"{flow_id}/pages/START_PAGE" + + def _get_page_display_name( + self, flow_id: str, page_id: str, + pages_map: Dict[str, Dict[str, str]]) -> str: + """Get the Page Display Name from the Pages Map based on the Page ID.""" + page_map = pages_map.get(flow_id, None) + page = "START_PAGE" + if page_map: + page = page_map.get(page_id, None) + + return page + + def _process_test_case(self, test_case, flows_map, pages_map): + """Takes a response from list_test_cases and returns a single row + dataframe of the test case result. + + Args: + test_case: The test case response + flows_map: A dictionary mapping flow IDs to flow display names + pages_map: A dictionary with keys as flow IDs and values as + dictionaries mapping page IDs to page display names for that flow + + Returns: A dataframe with columns: + display_name, id, short_id, tags, creation_time, start_flow, + start_page, test_result, passed, test_time + """ + flow_id = self._get_flow_id_from_test_config(test_case) + page_id = self._get_page_id_from_test_config(test_case, flow_id) + page = self._get_page_display_name(flow_id, page_id, pages_map) + test_result = self._convert_test_result_to_bool(test_case) + + return pd.DataFrame( + { + "display_name": [test_case.display_name], + "id": [test_case.name], + "short_id": [test_case.name.split("/")[-1]], + "tags": [",".join(test_case.tags)], + "creation_time": [test_case.creation_time], + "start_flow": [flows_map.get(flow_id, None)], + "start_page": [page], + # "test_result": [test_result], + "passed": [test_result], + "test_time": [test_case.last_test_result.test_time] + } + ) + + def _retest_cases( + self, test_case_df: pd.DataFrame, retest_ids: List[str] + ) -> pd.DataFrame: + print("To retest:", len(retest_ids)) + response = self.batch_run_test_cases(retest_ids, self.agent_id) + for result in response.results: + # Results may not be in the same order as they went in + # Process the name a bit to remove the /results/id part + tc_id_full = "/".join(result.name.split("/")[:-2]) + tc_id = tc_id_full.rsplit("/", maxsplit=1)[-1] + + # Update dataframe where id = tc_id_full + # row = test_case_df.loc[test_case_df['id']==tc_id_full] + test_case_df.loc[ + test_case_df["id"] == tc_id_full, "short_id" + ] = tc_id + # test_case_df.loc[ + # test_case_df["id"] == tc_id_full, "test_result" + # ] = str(result.test_result) + test_case_df.loc[ + test_case_df["id"] == tc_id_full, "test_time" + ] = result.test_time + test_case_df.loc[test_case_df["id"] == tc_id_full,"passed"] = ( + str(result.test_result) == "TestResult.PASSED" + ) + + return test_case_df + + @scrapi_base.api_call_counter_decorator + def list_test_cases( + self, agent_id: str = None, include_conversation_turns: bool = False + ): """List test cases from an agent. Args: - agent_id: The agent to list all pages for. + agent_id: The agent to list all test cases for. `projects//locations//agents/` + include_conversation_turns: Either to include the conversation turns + in the test cases or not. Default is False + which shows only the basic metadata about the test cases. Returns: List of test cases from an agent. @@ -73,8 +195,14 @@ def list_test_cases(self, agent_id: str = None): if not agent_id: agent_id = self.agent_id - request = types.test_case.ListTestCasesRequest() - request.parent = agent_id + if include_conversation_turns: + test_case_view = types.ListTestCasesRequest.TestCaseView.FULL + else: + test_case_view = types.ListTestCasesRequest.TestCaseView.BASIC + + request = types.test_case.ListTestCasesRequest( + parent=agent_id, view=test_case_view + ) client_options = self._set_region(agent_id) @@ -91,6 +219,7 @@ def list_test_cases(self, agent_id: str = None): return test_cases + @scrapi_base.api_call_counter_decorator def export_test_cases( self, gcs_uri: str, @@ -137,6 +266,7 @@ def export_test_cases( return response + @scrapi_base.api_call_counter_decorator def create_test_case(self, test_case: types.TestCase, agent_id: str = None): """Create a new Test Case in the specified CX Agent. @@ -162,6 +292,7 @@ def create_test_case(self, test_case: types.TestCase, agent_id: str = None): response = client.create_test_case(request) return response + @scrapi_base.api_call_counter_decorator def get_test_case(self, test_case_id: str): """Get test case object from CX Agent. @@ -184,6 +315,7 @@ def get_test_case(self, test_case_id: str): response = client.get_test_case(request) return response + @scrapi_base.api_call_counter_decorator def import_test_cases(self, gcs_uri: str, agent_id: str = None): """Import test cases from cloud storage. @@ -212,6 +344,7 @@ def import_test_cases(self, gcs_uri: str, agent_id: str = None): result = response.result() return result + @scrapi_base.api_call_counter_decorator def batch_delete_test_cases( self, test_case_ids: List[str], @@ -242,6 +375,7 @@ def batch_delete_test_cases( ) client.batch_delete_test_cases(request) + @scrapi_base.api_call_counter_decorator def list_test_case_results(self, test_case_id: str): """List the results from a specific Test Case. @@ -272,6 +406,7 @@ def list_test_case_results(self, test_case_id: str): return test_case_results + @scrapi_base.api_call_counter_decorator def batch_run_test_cases( self, test_cases: List[str], @@ -309,6 +444,7 @@ def batch_run_test_cases( results = response.result() return results + @scrapi_base.api_call_counter_decorator def update_test_case( self, test_case_id: str = None, @@ -348,6 +484,7 @@ def update_test_case( response = client.update_test_case(request) return response + @scrapi_base.api_call_counter_decorator def run_test_case(self, test_case_id: str, environment: str = None): """Run test case and get result for a specified test case. @@ -375,6 +512,7 @@ def run_test_case(self, test_case_id: str, environment: str = None): results = response.result() return results + @scrapi_base.api_call_counter_decorator def get_test_case_result(self, test_case_result_id: str): """Get test case result for a specified run on a specified test case. @@ -396,6 +534,7 @@ def get_test_case_result(self, test_case_result_id: str): response = client.get_test_case_result(request) return response + @scrapi_base.api_call_counter_decorator def calculate_coverage(self, coverage_type: int, agent_id: str = None): """Calculate coverage of different resources in the test case set. @@ -430,3 +569,50 @@ def calculate_coverage(self, coverage_type: int, agent_id: str = None): ) response = client.calculate_coverage(request) return response + + def get_test_case_results_df(self, agent_id=None, retest_all=False): + """Convert Test Cases to Dataframe. + + Gets the test case results for this agent, and generates a dataframe + with their details. Any tests without a result will be run in a batch. + + Args: + agent_id: The agent to create the test case for. Format: + `projects//locations//agents/` + retest_all: if true, all test cases are re-run, + regardless of whether or not they had a result + + Returns: + DataFrame of test case results for this agent, with columns: + display_name, id, short_id, tags, creation_time, start_flow, + start_page, passed, test_time + """ + if agent_id: + self.agent_id = agent_id + + dfcx_flows = flows.Flows(creds=self.creds, agent_id=self.agent_id) + dfcx_pages = pages.Pages(creds=self.creds) + flows_map = dfcx_flows.get_flows_map(agent_id=self.agent_id) + pages_map = {} + for flow_id in flows_map.keys(): + pages_map[flow_id] = dfcx_pages.get_pages_map(flow_id=flow_id) + + test_case_results = self.list_test_cases(self.agent_id) + retest_ids = [] + test_case_rows = [] + + for test_case in test_case_results: + row = self._process_test_case(test_case, flows_map, pages_map) + test_case_rows.append(row) + test_result = self._convert_test_result_to_string(test_case) + if retest_all or test_result == "TEST_RESULT_UNSPECIFIED": + retest_ids.append(test_case.name) + + # Create dataframe + test_case_df = pd.concat(test_case_rows) + + # Retest any that haven't been run yet + if len(retest_ids) > 0: + test_case_df = self._retest_cases(test_case_df,retest_ids) + + return test_case_df diff --git a/src/dfcx_scrapi/core/transition_route_groups.py b/src/dfcx_scrapi/core/transition_route_groups.py index 0d9990c9..8c2c8984 100644 --- a/src/dfcx_scrapi/core/transition_route_groups.py +++ b/src/dfcx_scrapi/core/transition_route_groups.py @@ -134,6 +134,7 @@ def get_route_groups_map(self, flow_id: str = None, reverse=False): return pages_dict + @scrapi_base.api_call_counter_decorator def list_transition_route_groups(self, flow_id: str = None): """Exports List of all Route Groups in the specified CX Flow ID. @@ -164,6 +165,7 @@ def list_transition_route_groups(self, flow_id: str = None): return cx_route_groups + @scrapi_base.api_call_counter_decorator def get_transition_route_group(self, route_group_id): """Get a single Transition Route Group object. @@ -183,6 +185,7 @@ def get_transition_route_group(self, route_group_id): return response + @scrapi_base.api_call_counter_decorator def create_transition_route_group( self, flow_id: str = None, @@ -222,6 +225,7 @@ def create_transition_route_group( return response + @scrapi_base.api_call_counter_decorator def update_transition_route_group( self, route_group_id: str = None, @@ -320,9 +324,11 @@ def route_groups_to_dataframe( temp_dict.update({"route_group_name": route_group.display_name}) if route.target_page: - temp_dict.update( - {"target_page": all_pages_map[route.target_page]} - ) + t_p = all_pages_map.get(route.target_page) + if not t_p: + t_p = str(route.target_page).rsplit("/", maxsplit=1)[-1] + + temp_dict.update({"target_page": t_p}) if route.intent: temp_dict.update({"intent": intents_map[route.intent]}) diff --git a/src/dfcx_scrapi/core/versions.py b/src/dfcx_scrapi/core/versions.py index 1d1d2658..cc94fd19 100644 --- a/src/dfcx_scrapi/core/versions.py +++ b/src/dfcx_scrapi/core/versions.py @@ -48,6 +48,7 @@ def __init__( if flow_id: self.flow_id = flow_id + @scrapi_base.api_call_counter_decorator def list_versions(self, flow_id:str): """List all Versions for a given Flow. @@ -79,6 +80,7 @@ def list_versions(self, flow_id:str): return versions + @scrapi_base.api_call_counter_decorator def get_version( self, version_id:str=None, @@ -140,6 +142,7 @@ def get_version_by_display_name(self, display_name:str, flow_id:str): return None + @scrapi_base.api_call_counter_decorator def load_version( self, version:types.version.Version, @@ -176,6 +179,7 @@ def load_version( response = client.load_version(request) return response + @scrapi_base.api_call_counter_decorator def create_version( self, flow_id:str, @@ -212,6 +216,7 @@ def create_version( return response + @scrapi_base.api_call_counter_decorator def delete_version(self, version_id:str): """Delete a specified Version. @@ -229,6 +234,7 @@ def delete_version(self, version_id:str): return client.delete_version(request) + @scrapi_base.api_call_counter_decorator def compare_versions( self, base_version_id:str, diff --git a/src/dfcx_scrapi/core/webhooks.py b/src/dfcx_scrapi/core/webhooks.py index 9731ecab..3a10b4c2 100644 --- a/src/dfcx_scrapi/core/webhooks.py +++ b/src/dfcx_scrapi/core/webhooks.py @@ -88,6 +88,7 @@ def get_webhooks_map( return webhooks_dict + @scrapi_base.api_call_counter_decorator def list_webhooks(self, agent_id: str = None): """List all Webhooks in the specified CX Agent. @@ -116,6 +117,7 @@ def list_webhooks(self, agent_id: str = None): return cx_webhooks + @scrapi_base.api_call_counter_decorator def create_webhook( self, agent_id: str, @@ -149,6 +151,7 @@ def create_webhook( return response + @scrapi_base.api_call_counter_decorator def get_webhook(self, webhook_id:str): """Retrieves the specified webhook. @@ -202,6 +205,7 @@ def get_webhook_by_display_name( return webhook_obj + @scrapi_base.api_call_counter_decorator def update_webhook( self, webhook_id:str, diff --git a/src/dfcx_scrapi/tools/agent_checker_util.py b/src/dfcx_scrapi/tools/agent_checker_util.py new file mode 100644 index 00000000..ff6e331a --- /dev/null +++ b/src/dfcx_scrapi/tools/agent_checker_util.py @@ -0,0 +1,203 @@ +"""A set of Utility methods to check resources stats on DFCX Agents.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import time +from typing import Dict, List +import pandas as pd + +from dfcx_scrapi.core import scrapi_base +from dfcx_scrapi.agent_extract import agents + +# logging config +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)-8s %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", +) + +class AgentCheckerUtil(scrapi_base.ScrapiBase): + """Utility class for checking DFCX Agents.""" + + def __init__( + self, + agent_id: str, + gcs_bucket_uri: str, + creds_path: str = None, + creds_dict: Dict = None, + creds=None, + scope=False, + ): + super().__init__( + creds_path=creds_path, + creds_dict=creds_dict, + creds=creds, + scope=scope, + ) + self.agent_id = agent_id + self.special_pages = [ + "End Session", + "End Flow", + "Start Page", + "Current Page", + "Previous Page", + ] + + startup_time = time.time() + self.extract = agents.Agents(agent_id) + processing_time = time.time() + logging.debug(f"STARTUP: {processing_time - startup_time}") + + self.data = self.extract.process_agent(agent_id, gcs_bucket_uri) + logging.debug(f"TOTAL PROCESSING: {time.time() - processing_time}") + + self.active_intents_df = self.active_intents_to_dataframe() + + def _filter_special_pages(self, page: str, filter_special_pages: bool): + """Recursion helper to check for special page match.""" + if filter_special_pages and page in self.special_pages: + return True + + return False + + def _recurse_edges(self, edges: Dict[str, List[str]], page: str, + visited: set, depth: int, max_depth: int, + filter_special_pages: bool): + """Recursion method used to traverse the agent graph for page data. + + Args: + edges: The set of graph edges collected from the agent. + page: The current Page Display Name + visited: A set of visited Page nodes + depth: The current recursion depth + max_depth: The max recursion depth + filter_special_pages: Will discard all self.special_pages from output + if set to False. + """ + if depth == max_depth: + return visited + + if page in edges: + for inner_page in edges[page]: + if self._filter_special_pages(inner_page, filter_special_pages): + return visited + + if inner_page not in visited: + visited.add(inner_page) + visited = self._recurse_edges( + edges, inner_page, visited, depth+1, max_depth, + filter_special_pages) + + return visited + + def _mark_unreachable_pages(self, df: pd.DataFrame) -> pd.DataFrame: + """Mark dataframe rows True if the page is unreachable in graph.""" + for idx, row in df.iterrows(): + for page in self.data.unreachable_pages[row["flow"]]: + if row["page"] == page: + df.loc[idx, "unreachable"] = True + + return df + + def get_reachable_pages( + self, + flow_display_name: str, + page_display_name: str = "Start Page", + max_depth: int = 1, + filter_special_pages = True) -> List[str]: + """Get all pages in the graph that are reachable via transition routes, + starting from a given Flow and Page. + + Args: + flow_display_name: The display name of the flow + page_display_name: The display name of the page. Defaults to + "Start Page" + max_depth: The max recursion depth to search the graph from the + provided starting point. For example, a max_depth of 2 would produce + all reachable Pages that are 2 transition routes away from the + starting Flow/Page. Defaults to 1. + filter_special_pages: Will filter out all self.special_pages. Defaults + to True. + """ + if page_display_name in ["START", "START_PAGE", "Start", "Start Page"]: + page_display_name = "Start Page" + page_display_name = f"{flow_display_name}: {page_display_name}" + + visited = self._recurse_edges( + self.data.graph.edges, page_display_name, set(), 0, max_depth, + filter_special_pages) + + return list(visited) + + def active_intents_to_dataframe(self) -> pd.DataFrame: + """Gets all intents referenced in the agent, across all flows and pages, + and produces a dataframe listing which flows/pages reference each + intent. + + Returns: + A dataframe with columns + intent - the intent display name + flow - the Flow Display Name where the intent resides + page - the Page Display Name where the intent resides + unreachable - Denotes whether the Flow/Page/Intent combination is + unreachable in the graph. + """ + df = pd.DataFrame({ + "intent": pd.Series(dtype="str"), + "flow": pd.Series(dtype="str"), + "page": pd.Series(dtype="str"), + "unreachable": pd.Series(dtype="bool") + }) + + # Loop over active_intents, create temp dataframe, then concat with the + # main dataframe to build out the complete Flow/Page/Intent dataset. + for flow in self.data.active_intents: + for pair in self.data.active_intents[flow]: + intent = pair[0] + page = pair[1] + temp = pd.DataFrame({ + "intent": [intent], + "flow": [flow], + "page": [page], + "unreachable": [False]}) + df = pd.concat([df, temp]) + + df = df.reset_index(drop=True) + + # Finally, determine what rows are unreachable. + self.active_intents_df = self._mark_unreachable_pages(df) + + return self.active_intents_df + + def get_unused_intents(self) -> List: + """Get all unused Intents across the agent.""" + if self.active_intents_df.empty: + self.active_intents_df = self.active_intents_to_dataframe() + active_intents_set = set(self.active_intents_df.intent.to_list()) + all_intents_set = set(self.data.intents_map.keys()) + + return list(all_intents_set.difference(active_intents_set)) + + def get_unreachable_intents(self) -> pd.DataFrame: + """Get all unreachable Intents across the agent. + + An Intent is unreachable if it resides on a page that is also + unreachable. + """ + if self.active_intents_df.empty: + self.active_intents_df = self.active_intents_to_dataframe() + + return self.active_intents_df[self.active_intents_df["unreachable"]] diff --git a/src/dfcx_scrapi/tools/copy_util.py b/src/dfcx_scrapi/tools/copy_util.py index 44704e9d..65498cd8 100644 --- a/src/dfcx_scrapi/tools/copy_util.py +++ b/src/dfcx_scrapi/tools/copy_util.py @@ -134,36 +134,22 @@ def _convert_entry_webhooks(page_object, webhooks_map): def __convert_tr_target_page( trans_route, pages_map, convert_type=None, flows_map=None, flow=None ): + special_pages = [ + "END_FLOW", "END_SESSION", + "CURRENT_PAGE", "PREVIOUS_PAGE", "START_PAGE" + ] if convert_type == "source": - if trans_route.target_page.split("/")[-1] == "END_FLOW": - trans_route.target_page = "END_FLOW" - elif trans_route.target_page.split("/")[-1] == "END_SESSION": - trans_route.target_page = "END_SESSION" - elif trans_route.target_page.split("/")[-1] == "CURRENT_PAGE": - trans_route.target_page = "CURRENT_PAGE" - elif trans_route.target_page.split("/")[-1] == "PREVIOUS_PAGE": - trans_route.target_page = "PREVIOUS_PAGE" - elif trans_route.target_page.split("/")[-1] == "START_PAGE": - trans_route.target_page = "START_PAGE" + last_part = trans_route.target_page.split("/")[-1] + if last_part in special_pages: + trans_route.target_page = last_part else: trans_route.target_page = pages_map[trans_route.target_page] elif convert_type == "destination": - if trans_route.target_page == "END_FLOW": - trans_route.target_page = flows_map[flow] + "/pages/END_FLOW" - elif trans_route.target_page == "END_SESSION": - trans_route.target_page = flows_map[flow] + "/pages/END_SESSION" - elif trans_route.target_page == "CURRENT_PAGE": - trans_route.target_page = ( - flows_map[flow] + "/pages/CURRENT_PAGE" - ) - elif trans_route.target_page == "PREVIOUS_PAGE": - trans_route.target_page = ( - flows_map[flow] + "/pages/PREVIOUS_PAGE" - ) - elif trans_route.target_page == "START_PAGE": - trans_route.target_page = flows_map[flow] + "/pages/START_PAGE" + if trans_route.target_page in special_pages: + new_page = f"{flows_map[flow]}/pages/{trans_route.target_page}" + trans_route.target_page = new_page else: trans_route.target_page = pages_map[trans_route.target_page] @@ -628,7 +614,9 @@ def copy_entity_type_to_agent( # push to destination agent try: - self.entities.create_entity_type(destination_agent, entity_object) + self.entities.create_entity_type( + agent_id=destination_agent, + obj=entity_object) logging.info( "Entity Type %s created successfully", entity_object.display_name, diff --git a/src/dfcx_scrapi/tools/dataframe_functions.py b/src/dfcx_scrapi/tools/dataframe_functions.py index 9df0cd24..e10e5c21 100644 --- a/src/dfcx_scrapi/tools/dataframe_functions.py +++ b/src/dfcx_scrapi/tools/dataframe_functions.py @@ -80,7 +80,10 @@ def __init__( if scope: scopes += scope - if creds_path: + if creds: + self.sheets_client = gspread.authorize(creds) + + elif creds_path: creds = ServiceAccountCredentials.from_json_keyfile_name( filename=creds_path, scopes=scopes ) diff --git a/src/dfcx_scrapi/tools/nlu_evals.py b/src/dfcx_scrapi/tools/nlu_evals.py new file mode 100644 index 00000000..d8c84977 --- /dev/null +++ b/src/dfcx_scrapi/tools/nlu_evals.py @@ -0,0 +1,308 @@ +"""A set of Utility methods to check resources stats on DFCX Agents.""" + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict +from dataclasses import dataclass + +import logging +import datetime +import pandas as pd +import gspread + +from dfcx_scrapi.core import scrapi_base +from dfcx_scrapi.core import agents +from dfcx_scrapi.core import flows +from dfcx_scrapi.core import pages +from dfcx_scrapi.core import intents +from dfcx_scrapi.core import conversation +from dfcx_scrapi.tools import dataframe_functions + +pd.options.display.max_colwidth = 200 + +GLOBAL_SCOPE = [ + "https://spreadsheets.google.com/feeds", + "https://www.googleapis.com/auth/drive", +] +INPUT_SCHEMA_COLUMNS = [ + "flow_display_name", + "page_display_name", + "utterance", + "expected_intent", + "expected_parameters", + "description", + ] + +OUTPUT_SCHEMA_COLUMNS = [ + "flow_display_name", + "page_display_name", + "utterance", + "expected_intent", + "expected_parameters", + "target_page", + "match_type", + "confidence", + "parameters_set", + "detected_intent", + "agent_display_name", + "description", + "input_source" + ] + +SUMMARY_SCHEMA_COLUMNS = [ + "test_run_timestamp", + "total_tests", + "pass_count", + "pass_rate", + "no_match_count", + "no_match_rate", + "test_agent", + "data_source" + ] + +# logging config +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)-8s %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", +) + + +@dataclass +class Stats: + """Dataclass for the summary stats.""" + no_match_count: int = 0 + no_match_rate: float = 0.0 + pass_count: int = 0 + pass_rate: float = 0.0 + test_agent: str = None + data_source: str = None + + +class NluEvals(scrapi_base.ScrapiBase): + """NLU Evaluation Class for Dialogflow CX Testing.""" + def __init__( + self, + agent_id: str, + creds_path: str = None, + creds_dict: Dict[str, str] = None, + creds=None, + ): + + super().__init__( + creds_path=creds_path, + creds_dict=creds_dict, + creds=creds, + scope=GLOBAL_SCOPE + ) + + self.agent_id = agent_id + self._sheets_client = self._build_sheets_client() + + self._a = agents.Agents(creds=self.creds) + self._i = intents.Intents(creds=self.creds) + self._f = flows.Flows(creds=self.creds) + self._p = pages.Pages(creds=self.creds) + self._dc = conversation.DialogflowConversation( + creds_path=creds_path, agent_id=agent_id + ) + self._dffx = dataframe_functions.DataframeFunctions(creds=self.creds) + + def _build_sheets_client(self): + client = gspread.authorize(self.creds) + + return client + + def _calculate_stats(self, df: pd.DataFrame): + """Calculate all the stats needed for the summary report.""" + stats = Stats() + stats.no_match_count = ( + df[df.detected_intent == "NO_MATCH"] + .groupby("detected_intent") + .size() + .sum() + ) + stats.no_match_rate = stats.no_match_count / df.shape[0] + stats.pass_count = ( + df[df.detected_intent == df.expected_intent] + .groupby("detected_intent") + .size() + .sum() + ) + stats.pass_rate = stats.pass_count / df.shape[0] + stats.test_agent = df.agent_display_name.unique()[0] + stats.data_source = df.input_source.unique()[0] + + return stats + + def _write_report_summary_to_sheets( + self, df: pd.DataFrame, sheet_name: str, sheet_tab: str + ): + """Writes the output report summary to Google Sheets.""" + + gsheet = self._sheets_client.open(sheet_name) + sheet = gsheet.worksheet(sheet_tab) + + df["test_run_timestamp"] = df.test_run_timestamp.astype("str") + + sheet.append_row( + df.values.flatten().tolist(), value_input_option="USER_ENTERED" + ) + + def _append_test_results_to_sheets( + self, results: pd.DataFrame, sheet_name: str, sheet_tab: str + ): + """Adds results to an existing Google Sheet collection.""" + + gsheet = self._sheets_client.open(sheet_name) + sheet = gsheet.worksheet(sheet_tab) + + # Fixes an error that sometimes happens when trying to write parameters + # to the sheet because they are formatted as objects + result_list = results.values.tolist() + result_list = [list(map(str, row)) for row in result_list] + + sheet.append_rows(result_list, value_input_option="USER_ENTERED") + + def _write_test_results_to_sheets( + self, results: pd.DataFrame, sheet_name: str, sheet_tab: str + ): + """Writes the output result details to Google Sheets.""" + + gsheet = self._sheets_client.open(sheet_name) + sheet = gsheet.worksheet(sheet_tab) + + sheet.clear() + + self._dffx.dataframe_to_sheets(sheet_name, sheet_tab, results) + + def _clean_dataframe(self, df): + """Various Dataframe cleaning functions.""" + df.columns = df.columns.str.lower() + df = df.replace("Start Page", "START_PAGE") + df.rename( + columns={ + "source": "description", + }, + inplace=True, + ) + + # Validate input schema + try: + df = df[INPUT_SCHEMA_COLUMNS] + except KeyError as err: + raise UserWarning("Ensure your input data contains the following "\ + f"columns: {INPUT_SCHEMA_COLUMNS}") from err + + df["agent_display_name"] = self._a.get_agent(self.agent_id).display_name + + return df + + def process_input_csv(self, input_file_path: str): + """Process the input data in CSV format.""" + df = pd.read_csv(input_file_path) + df = df.fillna("") + df = self._clean_dataframe(df) + df["input_source"] = input_file_path + + return df + + def process_input_google_sheet(self, gsheet_name: str, gsheet_tab: str): + """Process the input data in Google Sheets format.""" + df = self._dffx.sheets_to_dataframe(gsheet_name, gsheet_tab) + df = self._clean_dataframe(df) + df["input_source"] = gsheet_tab + + return df + + def run_evals(self, df: pd.DataFrame, chunk_size: int = 300, + rate_limit: float = 10.0, + eval_run_display_name: str = "Evals"): + """Run the full Eval dataset.""" + logsx = "-" * 10 + + logging.info(f"{logsx} STARTING {eval_run_display_name} {logsx}") + results = self._dc.run_intent_detection( + test_set=df, chunk_size=chunk_size, rate_limit=rate_limit + ) + + # Reorder Columns + results = results.reindex(columns=OUTPUT_SCHEMA_COLUMNS) + + # When a NO_MATCH occurs, the detected_intent field will be blank + # this replaces with NO_MATCH string, which will allow for easier stats + # calculation downstream + results.detected_intent.replace({"": "NO_MATCH"}, inplace=True) + + logging.info(f"{logsx} {eval_run_display_name} COMPLETE {logsx}") + + return results + + def generate_report(self, df: pd.DataFrame, + report_timestamp: datetime.datetime + ): + """Generates a summary stats report for most recent NLU Eval tests.""" + # Calc fields + stats = self._calculate_stats(df) + + # Generate Dataframe format + df_report = pd.DataFrame( + columns=SUMMARY_SCHEMA_COLUMNS, + data=[ + [ + report_timestamp, + df.shape[0], + stats.pass_count, + stats.pass_rate, + stats.no_match_count, + stats.no_match_rate, + stats.test_agent, + stats.data_source, + ] + ], + ) + + return df_report + + def write_summary_to_file(self, df: pd.DataFrame, output_file: str): + """Write summary output to a local CSV file.""" + report_timestamp = datetime.datetime.now() + df_report = self.generate_report(df, report_timestamp) + df_report.to_csv(output_file, index=False) + + def write_results_to_file(self, df: pd.DataFrame, output_file: str): + df.to_csv(output_file, index=False) + + def write_results_to_sheets(self, df: pd.DataFrame, google_sheet_name: str, + full_output_tab: str, + summary_tab: str, + append=False): + """Write summary and detailed output to Google Sheets.""" + report_timestamp = datetime.datetime.now() + df_report = self.generate_report(df, report_timestamp) + + self._write_report_summary_to_sheets( + df_report, google_sheet_name, summary_tab + ) + + if append: + self._append_test_results_to_sheets( + df, google_sheet_name, full_output_tab + ) + + else: + self._write_test_results_to_sheets( + df, google_sheet_name, full_output_tab + ) diff --git a/src/dfcx_scrapi/tools/webhook_util.py b/src/dfcx_scrapi/tools/webhook_util.py index 6bddb296..197a93c8 100644 --- a/src/dfcx_scrapi/tools/webhook_util.py +++ b/src/dfcx_scrapi/tools/webhook_util.py @@ -74,7 +74,9 @@ def build_session_info(parameters): return session_info @staticmethod - def build_response(response_text=None, page_info=None, session_info=None): + def build_response( + response_text=None, page_info=None, session_info=None, append=False + ): """Builds a Response object for Dialogflow CX. Provides the JSON object structure expected by DFCX for the Response @@ -85,12 +87,16 @@ def build_response(response_text=None, page_info=None, session_info=None): response_text: The text response to be displayed to the user. Can also be empty string if no response to the user is required. page_info: (Optional) The JSON object returned by build_page_info() - session_info: (Optiona) The JSON object returned by + session_info: (Optional) The JSON object returned by build_session_info() + append: (Optional) Whether messages will append or replace to + the list of messages waiting to be sent to the user. If append + set to False it will replace the messages. """ + action = 'APPEND' if append else 'REPLACE' if response_text: response_object = { - 'mergeBehavior': 'REPLACE', + 'mergeBehavior': action, 'messages': [ { 'text': {