From 669f080229443e55321cba88f68b3f7684809b20 Mon Sep 17 00:00:00 2001 From: Heyi Tang Date: Fri, 2 Feb 2024 18:44:04 +0800 Subject: [PATCH] [Executor][Internal]Use folder name as flow name in the trace (#1929) # Description This pull request primarily focuses on code refactoring and improving variable naming in the `promptflow` package. The changes include moving the `_normalize_identifier_name` and `_sanitize_python_variable_name` functions to a different file, using the `_sanitize_python_variable_name` function to generate a default name for a flow, and changing the span name in the telemetry tracer. Here are the most important changes in detail: Code refactoring: * [`src/promptflow/promptflow/_sdk/_utils.py`](diffhunk://#diff-47208ac35b30920275fcd5e55d662647ef360129359bdc77fddd2a2157b6f47eL372-R374): The `_normalize_identifier_name` and `_sanitize_python_variable_name` functions were removed from this file. Instead, `_sanitize_python_variable_name` is now imported from `promptflow._utils.utils`. * [`src/promptflow/promptflow/_utils/utils.py`](diffhunk://#diff-d12fdd7b90cc1748f1d3e1237b4f357ba7f66740445d117beeb68ed174d1e86eR299-R311): The `_normalize_identifier_name` and `_sanitize_python_variable_name` functions were added to this file. They are used to normalize and sanitize Python variable names. Variable naming: * [`src/promptflow/promptflow/contracts/flow.py`](diffhunk://#diff-b353941bce91518c7f494112d2dd5088b73681eb66e6ff19294edb9c3ec05d0fL20-R20): The `_sanitize_python_variable_name` function is now imported from `promptflow._utils.utils` and used to generate a default name for a flow when one is not provided in the YAML file. [[1]](diffhunk://#diff-b353941bce91518c7f494112d2dd5088b73681eb66e6ff19294edb9c3ec05d0fL20-R20) [[2]](diffhunk://#diff-b353941bce91518c7f494112d2dd5088b73681eb66e6ff19294edb9c3ec05d0fR658) Telemetry: * [`src/promptflow/promptflow/executor/flow_executor.py`](diffhunk://#diff-faa6c81d614b7e41b18a42a93139d961d92afa9aa9dd0b72cb6b7176d7541e69L780-R780): The span name in the telemetry tracer was changed from a hardcoded string to the name of the flow being executed. # All Promptflow Contribution checklist: - [ ] **The pull request does not introduce [breaking changes].** - [ ] **CHANGELOG is updated for new features, bug fixes or other significant changes.** - [ ] **I have read the [contribution guidelines](../CONTRIBUTING.md).** - [ ] **Create an issue and link to the pull request to get dedicated review from promptflow team. Learn more: [suggested workflow](../CONTRIBUTING.md#suggested-workflow).** ## General Guidelines and Best Practices - [ ] Title of the pull request is clear and informative. - [ ] There are a small number of commits, each of which have an informative message. This means that previously merged commits do not appear in the history of the PR. For more information on cleaning up the commits in your PR, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md). ### Testing Guidelines - [ ] Pull request includes test coverage for the included changes. --------- Co-authored-by: Heyi --- src/promptflow/promptflow/_sdk/_utils.py | 12 ++---------- src/promptflow/promptflow/_utils/utils.py | 13 +++++++++++++ src/promptflow/promptflow/contracts/flow.py | 5 +++-- src/promptflow/promptflow/executor/flow_executor.py | 2 +- 4 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/promptflow/promptflow/_sdk/_utils.py b/src/promptflow/promptflow/_sdk/_utils.py index e2f8f7bf558..631211550ae 100644 --- a/src/promptflow/promptflow/_sdk/_utils.py +++ b/src/promptflow/promptflow/_sdk/_utils.py @@ -369,17 +369,9 @@ def safe_parse_object_list(obj_list, parser, message_generator): return results -def _normalize_identifier_name(name): - normalized_name = name.lower() - normalized_name = re.sub(r"[\W_]", " ", normalized_name) # No non-word characters - normalized_name = re.sub(" +", " ", normalized_name).strip() # No double spaces, leading or trailing spaces - if re.match(r"\d", normalized_name): - normalized_name = "n" + normalized_name # No leading digits - return normalized_name - - def _sanitize_python_variable_name(name: str): - return _normalize_identifier_name(name).replace(" ", "_") + from promptflow._utils.utils import _sanitize_python_variable_name + return _sanitize_python_variable_name(name) def _get_additional_includes(yaml_path): diff --git a/src/promptflow/promptflow/_utils/utils.py b/src/promptflow/promptflow/_utils/utils.py index bbaf46399f7..bb789f30034 100644 --- a/src/promptflow/promptflow/_utils/utils.py +++ b/src/promptflow/promptflow/_utils/utils.py @@ -296,3 +296,16 @@ def prompt_y_n(msg, default=None): def prompt_input(msg): return input("\n===> " + msg) + + +def _normalize_identifier_name(name): + normalized_name = name.lower() + normalized_name = re.sub(r"[\W_]", " ", normalized_name) # No non-word characters + normalized_name = re.sub(" +", " ", normalized_name).strip() # No double spaces, leading or trailing spaces + if re.match(r"\d", normalized_name): + normalized_name = "n" + normalized_name # No leading digits + return normalized_name + + +def _sanitize_python_variable_name(name: str): + return _normalize_identifier_name(name).replace(" ", "_") diff --git a/src/promptflow/promptflow/contracts/flow.py b/src/promptflow/promptflow/contracts/flow.py index 1ef62efbb8d..6859902068f 100644 --- a/src/promptflow/promptflow/contracts/flow.py +++ b/src/promptflow/promptflow/contracts/flow.py @@ -17,7 +17,7 @@ from .._constants import LANGUAGE_KEY, FlowLanguage from .._sdk._constants import DEFAULT_ENCODING from .._utils.dataclass_serializer import serialize -from .._utils.utils import try_import +from .._utils.utils import try_import, _sanitize_python_variable_name from ._errors import FailedToImportModule from .tool import ConnectionType, Tool, ToolType, ValueType @@ -601,7 +601,7 @@ def deserialize(data: dict) -> "Flow": outputs = data.get("outputs") or {} return Flow( # TODO: Remove this fallback. - data.get("id", data.get("name", "default_flow_id")), + data.get("id", "default_flow_id"), data.get("name", "default_flow"), nodes, {name: FlowInputDefinition.deserialize(i) for name, i in inputs.items()}, @@ -655,6 +655,7 @@ def from_yaml(cls, flow_file: Path, working_dir=None) -> "Flow": working_dir = cls._parse_working_dir(flow_file, working_dir) with open(working_dir / flow_file, "r", encoding=DEFAULT_ENCODING) as fin: flow_dag = load_yaml(fin) + flow_dag["name"] = flow_dag.get("name", _sanitize_python_variable_name(working_dir.stem)) return Flow._from_dict(flow_dag=flow_dag, working_dir=working_dir) @classmethod diff --git a/src/promptflow/promptflow/executor/flow_executor.py b/src/promptflow/promptflow/executor/flow_executor.py index c37b5472200..49c67bbb9ae 100644 --- a/src/promptflow/promptflow/executor/flow_executor.py +++ b/src/promptflow/promptflow/executor/flow_executor.py @@ -782,7 +782,7 @@ def _exec_with_trace( Returns: LineResult: Line run result """ - with open_telemetry_tracer.start_as_current_span("promptflow.flow") as span: + with open_telemetry_tracer.start_as_current_span(self._flow.name) as span: # initialize span span.set_attributes( {