diff --git a/.github/workflows/package-ci.yml b/.github/workflows/package-ci.yml new file mode 100644 index 0000000..e304d79 --- /dev/null +++ b/.github/workflows/package-ci.yml @@ -0,0 +1,52 @@ +name: AquDeM Code Quality Check + +on: + pull_request: + branches: [ main ] + paths: + - 'package/**' + push: + branches: [ main ] + paths: + - 'package/**' + +jobs: + test: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.12' + - name: Install dependencies + run: | + cd package + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install -r requirements_dev.txt + - name: Run unit tests with pytest + run: | + cd package + export PYTHONPATH=. + pytest + + - name: Make sure coverage is high enough + run: | + cd package + export PYTHONPATH=. + coverage run --source=aqudem --branch -m pytest + coverage report --fail-under=90 + + - name: Lint with pylint + run: | + cd package + pylint aqudem + pylint --disable=protected-access,missing-function-docstring tests + + - name: Type checking with mypy + run: | + cd package + mypy aqudem tests --strict diff --git a/.gitignore b/.gitignore index 68bc17f..2dc53ca 100644 --- a/.gitignore +++ b/.gitignore @@ -157,4 +157,4 @@ cython_debug/ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ +.idea/ diff --git a/README.md b/README.md index fe5a82c..1445139 100644 --- a/README.md +++ b/README.md @@ -1 +1,8 @@ -# aqudem \ No newline at end of file +# AquDeM +**A**ctivity and Se**qu**ence **De**tection Performance **M**easures: A package to evaluate activity detection results, including the sequence of events given multiple activity types. + +The tool has a stand-alone package to use programatically in your Python code, and a web interface to evaluate the results of your activity detection algorithm interactively. + +For more info on the package, see `package/`. + +For more info on the web interface, see `frontend/`. \ No newline at end of file diff --git a/package/.editorconfig b/package/.editorconfig new file mode 100644 index 0000000..d4a2c44 --- /dev/null +++ b/package/.editorconfig @@ -0,0 +1,21 @@ +# http://editorconfig.org + +root = true + +[*] +indent_style = space +indent_size = 4 +trim_trailing_whitespace = true +insert_final_newline = true +charset = utf-8 +end_of_line = lf + +[*.bat] +indent_style = tab +end_of_line = crlf + +[LICENSE] +insert_final_newline = false + +[Makefile] +indent_style = tab diff --git a/package/.github/ISSUE_TEMPLATE.md b/package/.github/ISSUE_TEMPLATE.md new file mode 100644 index 0000000..4dc03ca --- /dev/null +++ b/package/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,15 @@ +* AquDeM version: +* Python version: +* Operating System: + +### Description + +Describe what you were trying to get done. +Tell us what happened, what went wrong, and what you expected to happen. + +### What I Did + +``` +Paste the command(s) you ran and the output. +If there was a crash, please include the traceback here. +``` diff --git a/package/.gitignore b/package/.gitignore new file mode 100644 index 0000000..4c915d1 --- /dev/null +++ b/package/.gitignore @@ -0,0 +1,106 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# dotenv +.env + +# virtualenv +.venv +venv/ +ENV/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# IDE settings +.vscode/ +.idea/ diff --git a/package/AUTHORS.rst b/package/AUTHORS.rst new file mode 100644 index 0000000..37b3dd6 --- /dev/null +++ b/package/AUTHORS.rst @@ -0,0 +1,13 @@ +======= +Credits +======= + +Development Lead +---------------- + +* Aaron F. Kurz + +Contributors +------------ + +None yet. Why not be the first? diff --git a/package/CONTRIBUTING.rst b/package/CONTRIBUTING.rst new file mode 100644 index 0000000..e898189 --- /dev/null +++ b/package/CONTRIBUTING.rst @@ -0,0 +1,120 @@ +.. highlight:: shell + +============ +Contributing +============ + +Contributions are welcome, and they are greatly appreciated! Every little bit +helps, and credit will always be given. + +You can contribute in many ways: + +Types of Contributions +---------------------- + +Report Bugs +~~~~~~~~~~~ + +Report bugs at https://github.com/ics-unisg/aqudem/issues. + +If you are reporting a bug, please include: + +* Your operating system name and version. +* Any details about your local setup that might be helpful in troubleshooting. +* Detailed steps to reproduce the bug. + +Fix Bugs +~~~~~~~~ + +Look through the GitHub issues for bugs. Anything tagged with "bug" and "help +wanted" is open to whoever wants to implement it. + +Implement Features +~~~~~~~~~~~~~~~~~~ + +Look through the GitHub issues for features. Anything tagged with "enhancement" +and "help wanted" is open to whoever wants to implement it. + +Write Documentation +~~~~~~~~~~~~~~~~~~~ + +AquDeM could always use more documentation, whether as part of the +official AquDeM docs, in docstrings, or even on the web in blog posts, +articles, and such. + +Submit Feedback +~~~~~~~~~~~~~~~ + +The best way to send feedback is to file an issue at https://github.com/ics-unisg/aqudem/issues. + +If you are proposing a feature: + +* Explain in detail how it would work. +* Keep the scope as narrow as possible, to make it easier to implement. +* Remember that this is a volunteer-driven project, and that contributions + are welcome :) + +Get Started! +------------ + +Ready to contribute? Here's how to set up `aqudem` for local development. + +1. Fork the `aqudem` repo on GitHub. +2. Clone your fork locally:: + + $ git clone git@github.com:your_name_here/aqudem.git + +3. Install the requirements using virtualenv (first create a virtualenv):: + + $ pip install -r requirements.txt + +4. Create a branch for local development:: + + $ git checkout -b name-of-your-bugfix-or-feature + + Now you can make your changes locally. + +5. When you're done making changes, check that your changes pass several requirements:: + + $ `./code-check.sh + + To get the necessary tools to execute the checks, run:: + + $ pip install -r requirements-dev.txt + +6. Commit your changes and push your branch to GitHub:: + + $ git add . + $ git commit -m "Your detailed description of your changes." + $ git push origin name-of-your-bugfix-or-feature + +7. Submit a pull request through the GitHub website. + +Pull Request Guidelines +----------------------- + +Before you submit a pull request, check that it meets these guidelines: + +1. The pull request should include tests. +2. If the pull request adds functionality, the docs should be updated. Put + your new functionality into a function with a docstring, and add the + feature to the list in README.rst. +3. The pull request should work for Python 3.12. Make sure that the pipeline passes. + +Tips +---- + + + +Deploying +--------- + +A reminder for the maintainers on how to deploy. +Make sure all your changes are committed (including an entry in HISTORY.rst). +Then run:: + +$ bump2version patch # possible: major / minor / patch +$ git push +$ git push --tags + +Travis will then deploy to PyPI if tests pass. diff --git a/package/HISTORY.rst b/package/HISTORY.rst new file mode 100644 index 0000000..24f6b6a --- /dev/null +++ b/package/HISTORY.rst @@ -0,0 +1,8 @@ +======= +History +======= + +0.1.0 (2024-02-08) +------------------ + +* First release on PyPI. diff --git a/package/LICENSE b/package/LICENSE new file mode 100644 index 0000000..f288702 --- /dev/null +++ b/package/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/package/MANIFEST.in b/package/MANIFEST.in new file mode 100644 index 0000000..965b2dd --- /dev/null +++ b/package/MANIFEST.in @@ -0,0 +1,11 @@ +include AUTHORS.rst +include CONTRIBUTING.rst +include HISTORY.rst +include LICENSE +include README.rst + +recursive-include tests * +recursive-exclude * __pycache__ +recursive-exclude * *.py[co] + +recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif diff --git a/package/Makefile b/package/Makefile new file mode 100644 index 0000000..d2e25bd --- /dev/null +++ b/package/Makefile @@ -0,0 +1,87 @@ +.PHONY: clean clean-build clean-pyc clean-test coverage dist docs help install lint lint/flake8 +.DEFAULT_GOAL := help + +define BROWSER_PYSCRIPT +import os, webbrowser, sys + +from urllib.request import pathname2url + +webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) +endef +export BROWSER_PYSCRIPT + +define PRINT_HELP_PYSCRIPT +import re, sys + +for line in sys.stdin: + match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) + if match: + target, help = match.groups() + print("%-20s %s" % (target, help)) +endef +export PRINT_HELP_PYSCRIPT + +BROWSER := python -c "$$BROWSER_PYSCRIPT" + +help: + @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) + +clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts + +clean-build: ## remove build artifacts + rm -fr build/ + rm -fr dist/ + rm -fr .eggs/ + find . -name '*.egg-info' -exec rm -fr {} + + find . -name '*.egg' -exec rm -f {} + + +clean-pyc: ## remove Python file artifacts + find . -name '*.pyc' -exec rm -f {} + + find . -name '*.pyo' -exec rm -f {} + + find . -name '*~' -exec rm -f {} + + find . -name '__pycache__' -exec rm -fr {} + + +clean-test: ## remove test and coverage artifacts + rm -fr .tox/ + rm -f .coverage + rm -fr htmlcov/ + rm -fr .pytest_cache + +lint/flake8: ## check style with flake8 + flake8 aqudem tests + +lint: lint/flake8 ## check style + +test: ## run tests quickly with the default Python + pytest + +test-all: ## run tests on every Python version with tox + tox + +coverage: ## check code coverage quickly with the default Python + coverage run --source aqudem -m pytest + coverage report -m + coverage html + $(BROWSER) htmlcov/index.html + +docs: ## generate Sphinx HTML documentation, including API docs + rm -f docs/aqudem.rst + rm -f docs/modules.rst + sphinx-apidoc -o docs/ aqudem + $(MAKE) -C docs clean + $(MAKE) -C docs html + $(BROWSER) docs/_build/html/index.html + +servedocs: docs ## compile the docs watching for changes + watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D . + +release: dist ## package and upload a release + twine upload dist/* + +dist: clean ## builds source and wheel package + python setup.py sdist + python setup.py bdist_wheel + ls -l dist + +install: clean ## install the package to the active Python's site-packages + python setup.py install diff --git a/package/README.rst b/package/README.rst new file mode 100644 index 0000000..52fb316 --- /dev/null +++ b/package/README.rst @@ -0,0 +1,87 @@ +====== +AquDeM +====== + + +.. image:: https://img.shields.io/pypi/v/aqudem.svg + :target: https://pypi.python.org/pypi/aqudem + +.. image:: https://readthedocs.org/projects/aqudem/badge/?version=latest + :target: https://aqudem.readthedocs.io/en/latest/?version=latest + :alt: Documentation Status + + + +Activity and Sequence Detection Performance Measures: A package to evaluate activity detection results, including the sequence of events given multiple activity types. + +* Documentation: https://aqudem.readthedocs.io. (TODO: not yet active) + +Installation +------------ +.. code-block:: bash + + pip install . + +Usage +----- +.. code-block:: python + + import aqudem + + aqu_context = aqudem.Context("ground_truth.xes", + "detected.xes") + + aqu_context.activity_names + aqu_context.case_ids + aqu_context.cross_correlation() + aqu_context.event_analysis(activity_name="Store Workpiece in HBW", case_id="case1") + aqu_context.two_set(activity_name="Store Workpiece in HBW") + aqu_context.levenshtein_distance() + + +For a more detailed description of the available methods, please refer to the rest of the documentation. + +Preface +-------- + +* Measurements and metrics to evaluate activity detection results +* Input: two XES files, one with the ground truth and one with the detection results +* Output: a set of metrics to evaluate the detection results +* Prerequisites for the input files: the XES files must... + + * ... have a ``sampling_freq`` in Hz associated with each case + * ... have a ``concept:name`` attribute for each case + * ... have a ``time:timestamp`` attribute for each event + * ... have an ``concept:name`` attribute for each event (activity name) + * ... have a ``lifecycle:transition`` attribute for each event + * ... each ``start`` event must have a corresponding ``complete`` event; and only these two types of events are relevant for the analysis currently + + +An ACTIVITY_METRIC is a metric that is calculated for each activity type +in each case separately. +Available ACTIVITY_METRICs are: + +* Cross-Correlation +* Event Analysis by `Ward et al. (2011)`_ +* Two Set Metrics by `Ward et al. (2011)`_ + +A SEQUENCE_METRIC is a metric that is calculated for each +case separately. +Available SEQUENCE_METRICs are: + +* Damerau-Levenshtein Distance +* Levenshtein Distance + +For requests that span multiple cases, the results are aggregated. The default and only aggregation method is currently averaging. + +Classifications are specified in the docstrings of the public +metric methods of aqudem.Context. + +Credits +------- + +This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template. + +.. _Cookiecutter: https://github.com/audreyr/cookiecutter +.. _`audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage +.. _`Ward et al. (2011)`: https://doi.org/10.1145/1889681.1889687 diff --git a/package/aqudem/__init__.py b/package/aqudem/__init__.py new file mode 100644 index 0000000..f9d0345 --- /dev/null +++ b/package/aqudem/__init__.py @@ -0,0 +1,21 @@ +"""Top-level package for AquDeM.""" +from .aqudem import Context +from .event_analysis_helper import EventAnalysis +from .two_set_helper import TwoSet +from .utils import (XESMissingTraceNameAttribute,XESMissingSamplingFreqError, + XESMissingTimestamp, XESMissingActivityName, XESMissingActivityInstance, + XESIncorrectLifecycleTransitionError) + +__author__ = """Aaron F. Kurz""" +__email__ = 'aaron.kurz@unisg.ch' +__version__ = '0.1.0' + +__all__ = ["Context", + "EventAnalysis", + "TwoSet", + "XESMissingTraceNameAttribute", + "XESMissingSamplingFreqError", + "XESMissingTimestamp", + "XESMissingActivityName", + "XESMissingActivityInstance", + "XESIncorrectLifecycleTransitionError"] diff --git a/package/aqudem/aqudem.py b/package/aqudem/aqudem.py new file mode 100644 index 0000000..4ff53d8 --- /dev/null +++ b/package/aqudem/aqudem.py @@ -0,0 +1,226 @@ +"""Main module.""" +from functools import cached_property, lru_cache +from typing import Tuple, Union +import pm4py # type: ignore +import static_frame as sf +from .two_set_helper import TwoSet +from .utils import (_validate_xes_dataframe_before_processing, _validate_activity_name, + _validate_case_id, _determine_start_end_per_case, + _remove_events_with_length_zero, _validate_xes_dataframe_after_processing) +from .damerau_levenshtein_helper import (_damerau_levenshtein_distancy_by_case, + _levenshtein_distancy_by_case) +from .cross_correlation_helper import _cross_correlation_by_activity_case +from .two_set_helper import _two_set_by_activity_case +from .event_analysis_helper import (_event_analysis_by_activity_case, EventAnalysis) + + +class Context: + """Class that offers main functionality of AquDeM.""" + + def __init__(self, ground_truth: str, detected: str): + """Constructor of AquDeMContext. + + Both files are expected to be in the XES format, with special constraints: + - The log must have an attribute specifying the sampling frequency in hertz + (key: "sampling_freq") on the trace level. + - Must use the concept:name, concept:instance, + lifecycle:transition and time:timestamp standard extensions. + - Each activity instance must have an event with at least + the lifecycle transitions tart and complete. + - In one case, the same activity can only be executed once at a time. + + An ACTIVITY_METRIC is a metric that is calculated for each activity type + in each case separately. + For requests that span multiple activities and/or cases, the results + are aggregated. + A SEQUENCE_METRIC is a metric that is calculated for each + case separately. + For requests that span multiple cases, the results are aggregated. + Classifications are specified in the docstrings of the public + metric methods of aqudem.Context. + :param str ground_truth: The ground truth log file path. + :param str detected: The detected log file path. + :return: An instance of AquDeMContext. + :rtype: AquDeMContext + """ + base_gt = sf.FrameHE.from_pandas( + pm4py.read_xes(ground_truth).sort_values(by="time:timestamp")) + base_det = sf.FrameHE.from_pandas( + pm4py.read_xes(detected).sort_values(by="time:timestamp")) + _validate_xes_dataframe_before_processing(base_gt, base_det) + self._ground_truth = _remove_events_with_length_zero(base_gt).relabel(sf.IndexAutoFactory) + self._detected = _remove_events_with_length_zero(base_det).relabel(sf.IndexAutoFactory) + _validate_xes_dataframe_after_processing(self._ground_truth, self._detected) + self._sampling_freq: float = self._ground_truth["case:sampling_freq"].iloc[0] + self._start_end_per_case = _determine_start_end_per_case(self._ground_truth, self._detected) + + @property + def ground_truth(self) -> sf.FrameHE: + """Get the ground truth log.""" + return self._ground_truth + + @property + def detected(self) -> sf.FrameHE: + """Get the detected log.""" + return self._detected + + @property + def sampling_freq(self) -> float: + """Get the sampling frequency of the logs.""" + return self._sampling_freq + + @cached_property + def activity_names(self) -> dict[str, list[str]]: + """Extract all the available activity names from the XES logs. + + :return: A dictionary with "ground_truth" and "detected" keys, each + containing a list of activity names. + """ + return { + "ground_truth": list(set(self._ground_truth["concept:name"].values)), + "detected": list(set(self._detected["concept:name"].values)) + } + + @cached_property + def case_ids(self) -> dict[str, list[str]]: + """Extract all the available case IDs from the XES logs. + + :return: A dictionary with "ground_truth" and "detected" keys, each + containing a list of case IDs. + """ + return { + "ground_truth": list(set(self._ground_truth["case:concept:name"].values)), + "detected": list(set(self._detected["case:concept:name"].values)) + } + + @lru_cache(maxsize=20) + def cross_correlation(self, + activity_name: str = "*", + case_id: str = "*") -> Tuple[float, float]: + """Calculate the cross-correlation between the ground truth and detected logs. + + ACTICITY_METRIC + :param activity_name: The name of the activity to calculate the cross-correlation for. + If "*" is passed, the cross-correlation will be calculated and averaged for all + activities. + :param case_id: The case ID to calculate the cross-correlation for. + If "*" is passed, the cross-correlation will be calculated and averaged for all + case IDs. + :return: Tuple; first element: cross-correlation value, between 0 and 1. + second element: relative shift to achieve maximum cross correlation. + """ + _validate_activity_name(self._ground_truth, + self._detected, + activity_name) + _validate_case_id(self._ground_truth, + self._detected, + case_id) + return _cross_correlation_by_activity_case(self._ground_truth, + self._detected, + self._sampling_freq, + activity_name, + case_id, + self._start_end_per_case) + + @lru_cache(maxsize=20) + def two_set(self, activity_name: str = "*", case_id: str = "*") -> TwoSet: + """Calculate the 2SET metrics for a given activity. Absolute values. + + ACTIVITY_METRIC + With the possibility to average over activities and cases. + Includes the absolute and rate metrics, for details see the + TwoSet class documentation. + For more info on the metrics, see: + See J. A. Ward, P. Lukowicz, and H. W. Gellersen, “Performance metrics for + activity recognition,” ACM Trans. Intell. Syst. Technol., vol. 2, no. 1, pp. 1–23, + Jan. 2011, doi: 10.1145/1889681.1889687.; 4.1.2 + :param activity_name: The name of the activity to calculate the two-set metrics for. + If "*" is passed, the two-set metrics will be calculated + and aggregated for all activities. + :param case_id: The case ID to calculate the two-set metrics for. + If "*" is passed, the two-set metrics will be calculated and + aggregated for all case IDs. + :return: A data class with the 2SET metrics. + """ + _validate_activity_name(self._ground_truth, + self._detected, + activity_name) + _validate_case_id(self._ground_truth, + self._detected, + case_id) + return _two_set_by_activity_case(self._ground_truth, + self._detected, + self._sampling_freq, + activity_name, + case_id, + self._start_end_per_case) + + + @lru_cache(maxsize=20) + def event_analysis(self, activity_name: str = "*", case_id: str = "*") -> EventAnalysis: + """Calculate the EA metrics. + + ACTIVITY_METRIC + With the possibility to average over activities and cases. + Includes the absolute and rate metrics, for details see the + EventAnalysis class documentation. + For more info on the metrics, see: + See J. A. Ward, P. Lukowicz, and H. W. Gellersen, “Performance metrics for + activity recognition,” ACM Trans. Intell. Syst. Technol., vol. 2, no. 1, pp. 1–23, + Jan. 2011, doi: 10.1145/1889681.1889687.; 4.2 + :param activity_name: The name of the activity to calculate the event analysis metrics for. + If "*" is passed, the metrics will be calculated + and aggregated for all activities. + :param case_id: The case ID to calculate the event analysis metrics for. + If "*" is passed, the metrics will be calculated and + aggregated for all case IDs. + :return: A data class with the EAD metrics. + """ + _validate_activity_name(self._ground_truth, + self._detected, + activity_name) + _validate_case_id(self._ground_truth, + self._detected, + case_id) + return _event_analysis_by_activity_case(self._ground_truth, + self._detected, + activity_name, + case_id, + self._start_end_per_case) + + @lru_cache(maxsize=20) + def damerau_levenshtein_distance(self, case_id: str = "*") -> Tuple[Union[float, int], float]: + """Calculate the Damerau-Levenshtein distance between the ground truth and + detected logs. + + Calculates both the absolute distance and the normalized distance. + SEQUENCE_METRIC + Order of activities based on start timestamps. + :param case_id: The case ID to calculate the Damerau-Levenshtein distance for. + If "*" is passed, the Damerau-Levenshtein distance will be calculated and + averaged for all case IDs. + :return: The Damerau-Levenshtein distance; tuple. + The first value in the tuple represents the (average) absolute distance. + The second value in the tuple represents the (average) normalized distance. + """ + _validate_case_id(self._ground_truth, self._detected, case_id) + return _damerau_levenshtein_distancy_by_case( + self._ground_truth, self._detected, case_id) + + @lru_cache(maxsize=20) + def levenshtein_distance(self, case_id: str = "*") -> Tuple[Union[float, int], float]: + """Calculate the Levenshtein distance between the ground truth and detected logs. + + Calculates both the absolute distance and the normalized distance. + SEQUENCE_METRIC + Order of activities based on start timestamps. + :param case_id: The case ID to calculate the Levenshtein distance for. + If "*" is passed, the Levenshtein distance will be + calculated and averaged for all case IDs. + :return: The Levenshtein distance; tuple. + The first value in the tuple represents the (average) absolute distance. + The second value in the tuple represents the (average) normalized distance. + """ + _validate_case_id(self._ground_truth, self._detected, case_id) + return _levenshtein_distancy_by_case( + self._ground_truth, self._detected, case_id) diff --git a/package/aqudem/cross_correlation_helper.py b/package/aqudem/cross_correlation_helper.py new file mode 100644 index 0000000..0dfcbfe --- /dev/null +++ b/package/aqudem/cross_correlation_helper.py @@ -0,0 +1,128 @@ +""" This module contains the cross correlation helper functions. """ +import math +from datetime import datetime, timedelta +from typing import List, Tuple, Any +import static_frame as sf +import numpy as np +from .utils import _generate_activity_metric_list + + +# pylint: disable=too-many-arguments +def _cross_correlation_by_activity_case(ground_truth: sf.FrameHE, + detected: sf.FrameHE, + sampling_freq: float, + activity_name: str, + case_id: str, + start_end_per_case: \ + sf.SeriesHE[sf.Index[np.str_], Any]) \ + -> Tuple[float, float]: + """Calculate the cross correlation between the ground truth and detected logs. + + :param ground_truth: The ground truth log. + :param detected: The detected log. + :param sampling_freq: The sampling frequency of the logs, in Hertz. + :param activity_name: The name of the activity. + If "*" is passed, the cross-correlation will be calculated and averaged for all activities. + :param case_id: The case ID. + If "*" is passed, the cross-correlation will be calculated and averaged for all case IDs. + :param start_end_per_case: The start and end times for each case. + :return: The cross correlation. + """ + cross_correlations = _generate_activity_metric_list(gt=ground_truth, + det=detected, + case_id=case_id, + activity_name=activity_name, + start_end_per_case=start_end_per_case, + metric=_cross_correlation, + sampling_freq=sampling_freq) + return (round(sum(cc[0] for cc in cross_correlations) / len(cross_correlations), 4), + round(sum(cc[1] for cc in cross_correlations) / len(cross_correlations), 4)) + + +def _cross_correlation(gt: sf.FrameHE, + det: sf.FrameHE, + start_end_per_case: sf.SeriesHE[sf.Index[np.str_], Any], + sampling_rate_hz: float) -> Tuple[float, float]: + """ + Relative shift > 0 -> the detected activity timeseries had + to be pushed forward in time (time delay/detected later than actual) + Relative shift < 0 -> the detected activity timeseries had + to be pulled back (time advance/detected earlier than actual) + + Assume + :param gt: Ground truth log + :param det: Detected log + :param start_end_per_case: Start and end times for all cases + :param sampling_rate_hz: Sampling rate in Hz + :return: cross correlation and relative shift necessary for max cc + """ + if len(gt) == 0 or len(det) == 0: + return 0.0, 0.0 + time_series_bin_gt, time_series_bin_det = _get_timeseries_format(gt, + det, + sampling_rate_hz, + start_end_per_case) + a = np.array(time_series_bin_gt) + b = np.array(time_series_bin_det) + if len(a) != len(b): + raise ValueError("Time series must have same length.") + normalization_factor = len(a) + c = np.correlate(a, b, mode='full') + c_max = c.max() + c_list = list(c) + if c_max != 0.0: + c_ind = c_list.index(c_max) + shift_total = c_ind - math.floor(len(c_list) / 2) + shift_relative = shift_total / math.floor(len(c_list) / 2) + else: + shift_relative = 0.0 + return round(c_max / normalization_factor, 2), round(shift_relative, 2) + + +def _get_timeseries_format(gt: sf.FrameHE, + det: sf.FrameHE, + sampling_rate_hz: float, + start_end_per_case: sf.SeriesHE[sf.Index[np.str_], Any], ) \ + -> Tuple[List[int], List[int]]: + """ + Turns a list of ground truth and detected activity + instances into a time series when the activity is running and + when not. All activities must be of same type (have same name). + :param gt: + :param det: + :param sampling_rate_hz: + :return: + """ + case_id = gt["case:concept:name"].iloc[0] + sorted_gt = gt.sort_values("time:timestamp") + sorted_det = det.sort_values("time:timestamp") + min_start: datetime = start_end_per_case[case_id][0] + max_stop: datetime = start_end_per_case[case_id][1] + time_step = timedelta(seconds=1) / sampling_rate_hz + timeseries_gt = [] + timeseries_det = [] + + current_time = min_start + while current_time <= max_stop: + rel_rows_gt = sorted_gt.loc[(sorted_gt["time:timestamp"] >= current_time)] + if len(rel_rows_gt) > 0 and (rel_rows_gt["lifecycle:transition"]. + values[0] == "complete" + or (rel_rows_gt["time:timestamp"] + .values[0] == current_time and + rel_rows_gt["lifecycle:transition"] + .values[0] == "start")): + timeseries_gt.append(1) + else: + timeseries_gt.append(-1) + rel_rows_det = sorted_det.loc[(sorted_det["time:timestamp"] >= current_time)] + if len(rel_rows_det) > 0 and (rel_rows_det["lifecycle:transition"] + .values[0] == "complete" + or (rel_rows_det["time:timestamp"] + .values[0] == current_time and + rel_rows_det["lifecycle:transition"] + .values[0] == "start")): + timeseries_det.append(1) + else: + timeseries_det.append(-1) + current_time += time_step + return timeseries_gt, timeseries_det diff --git a/package/aqudem/damerau_levenshtein_helper.py b/package/aqudem/damerau_levenshtein_helper.py new file mode 100644 index 0000000..e9e8441 --- /dev/null +++ b/package/aqudem/damerau_levenshtein_helper.py @@ -0,0 +1,109 @@ +"""Functionality for the calculation of the Damerau-Levenshtein distance.""" +from functools import partial +from typing import List, Union, Tuple +import static_frame as sf +import textdistance +from .utils import (_case_level_metric_pre_check, BUG_REPORT_CTA, + _generate_sequence_metric_list) + + +def _map_strings_to_letters(list1: List[str], + list2: Union[List[str], None] = None) -> Tuple[str, str]: + """Create two strings where each letter represents a unique string from the list of strings. + + Example: + (["i", "think", "i", "am", "think", "think", "am", "i"], + ["therefore", "therefore", "am", "i", "i", "am", "therefore", "am"]) + -> ("ABACBBCA", "DDCAACDC") + :param [str] list1: The first list of strings. + :param [str] list2: The second list of strings. + :return: + """ + # Combine the two lists if the second list is provided + all_strings = list1 + (list2 or []) + + # Create a mapping between unique strings and letters + unique_strings = sorted(set(all_strings), key=all_strings.index) + letter_mapping = {string: chr(ord('A') + i) for i, string in enumerate(unique_strings)} + + # Map strings to letters for the first list + result1 = ''.join(letter_mapping[string] for string in list1) + + # Map strings to letters for the second list (if provided) + result2 = '' + if list2 is not None: + result2 = ''.join(letter_mapping[string] for string in list2) + + return result1, result2 + + +def _damerau_levenshtein_distancy_by_case(gt: sf.FrameHE, + det: sf.FrameHE, + case_id: str) -> Tuple[Union[int, float], float]: + """Calculate the Damerau-Levenshtein distance between the ground truth and detected logs. + + :param gt: The ground truth log. + :param det: The detected log. + :param case_id: The case ID to calculate the Damerau-Levenshtein distance for. + If "*" is passed, the Damerau-Levenshtein distance will be calculated and averaged for all + case IDs. + :return: The Damerau-Levenshtein distance and the normalized Damerau-Levenshtein distance. + """ + all_d_l_distances = _generate_sequence_metric_list(gt, det, case_id, + partial(_damerau_opt_levenshtein_dist, + metr_type="dam_lev")) + + return (round(sum(dl[0] for dl in all_d_l_distances) / len(all_d_l_distances), 4), + round(sum(dl[1] for dl in all_d_l_distances) / len(all_d_l_distances), 4)) + + +def _levenshtein_distancy_by_case(gt: sf.FrameHE, + det: sf.FrameHE, + case_id: str) -> Tuple[Union[int, float], float]: + """Calculate the Levenshtein distance between the ground truth and detected logs. + + :param gt: The ground truth log. + :param det: The detected log. + :param case_id: The case ID to calculate the Levenshtein distance for. + If "*" is passed, the Levenshtein distance will be calculated and averaged for all + case IDs. + :return: The Levenshtein distance and the normalized Levenshtein distance. + """ + all_d_l_distances = _generate_sequence_metric_list(gt, det, case_id, + partial(_damerau_opt_levenshtein_dist, + metr_type="lev")) + + return (round(sum(dl[0] for dl in all_d_l_distances) / len(all_d_l_distances), 4), + round(sum(dl[1] for dl in all_d_l_distances) / len(all_d_l_distances), 4)) + + +def _damerau_opt_levenshtein_dist(gt: sf.FrameHE, + det: sf.FrameHE, + metr_type: str = "dam_lev") -> Tuple[int, float]: + """Calculate the (normalized) (Damerau-)Levenshtein distance + between the ground truth and detected logs. + + Sorted based on start time. + Assume the logs are filtered by case. + :param gt: The ground truth log. Filtered by case_id. + :param det: The detected log. Filtered by case_id. + :return: Tuple of Damerau-Levenshtein distance and normed version. + """ + _case_level_metric_pre_check(gt, det) + gt_start_event_fr = (gt.loc[gt["lifecycle:transition"] == "start"] + .sort_values("lifecycle:transition")) + dt_start_event_fr = (det.loc[det["lifecycle:transition"] == "start"] + .sort_values("lifecycle:transition")) + gt_activity_names = list(gt_start_event_fr["concept:name"].values) + dt_activity_names = list(dt_start_event_fr["concept:name"].values) + gt_string, dt_string = _map_strings_to_letters(gt_activity_names, dt_activity_names) + max_len = max(len(gt_string), len(dt_string)) + if max_len == 0: + return (0, 0.0) + if metr_type == "lev": + return (textdistance.levenshtein(gt_string, dt_string), + textdistance.levenshtein(gt_string, dt_string) / max_len) + if metr_type == "dam_lev": + return (textdistance.damerau_levenshtein(gt_string, dt_string), + textdistance.damerau_levenshtein(gt_string, dt_string) / max_len) + raise ValueError(f"Type must be 'dam_lev' or 'lev'. {BUG_REPORT_CTA}") diff --git a/package/aqudem/event_analysis_helper.py b/package/aqudem/event_analysis_helper.py new file mode 100644 index 0000000..d9671a3 --- /dev/null +++ b/package/aqudem/event_analysis_helper.py @@ -0,0 +1,346 @@ +""" Helper functions to calculate the Event Analysis (EA) metrics. """ +#pylint: disable=too-many-statements, too-many-locals, too-many-branches +from dataclasses import dataclass, fields +from collections import Counter +from functools import cached_property, lru_cache +from typing import Union, Any, Dict +import numpy as np +import static_frame as sf + +from .utils import (_case_activity_level_metric_pre_check, BUG_REPORT_CTA, + _generate_activity_metric_list, _get_case_in_filtered_logs) +from .ward_helper import _generate_segment_scores + + +# pylint: disable=too-many-instance-attributes +@dataclass(frozen=True) +class EventAnalysis: + """Data class to hold the EA metrics. + + If result of aggregated request, the values represent the average number of events + over the relevant log-activity pairs. + Regarding the ground truth events: + d: int, Deletions + f: int, Fragmentations + fm: int, Fragmentation and merge + m: int, Merges + Regarding both the ground truth and detected events: + c: int, Correct + Regarding the (d)etected events: + md: int, Merges + fmd: int, Fragmentation and merge + fd: int, Fragmentations + id: int, Insertions + """ + d: Union[int, float] + f: Union[int, float] + fm: Union[int, float] + m: Union[int, float] + c: Union[int, float] + md: Union[int, float] + fmd: Union[int, float] + fd: Union[int, float] + id: Union[int, float] + + @cached_property + def total_gt_events(self) -> Union[int, float]: + """Get the total number of ground truth events.""" + return round(self.d + self.f + self.fm + self.m + (self.c / 2), 4) + + @cached_property + def total_det_events(self) -> Union[int, float]: + """Get the total number of detected events.""" + return round((self.c / 2) + self.md + self.fmd + self.fd + self.id, 4) + + @cached_property + def correct_events_per_log(self) -> Union[int, float]: + """Get the total number of correct events per log.""" + return round(self.c / 2, 4) + + @cached_property + def dr(self) -> float: + """Get the deletion rate. + Ratio of deletions to total ground truth events.""" + return (round(self.d / self.total_gt_events, 4) + if self.total_gt_events > 0 + else 0) + + @cached_property + def fr(self) -> float: + """Get the fragmentation rate. + Ratio of fragmentations to total ground truth events.""" + return (round(self.f / self.total_gt_events, 4) + if self.total_gt_events > 0 + else 0) + + @cached_property + def fmr(self) -> float: + """Get the fragmentation and merge rate. + Ratio of fragmentation and merge to total ground truth events.""" + return (round(self.fm / self.total_gt_events, 4) + if self.total_gt_events > 0 + else 0) + + @cached_property + def mr(self) -> float: + """Get the merge rate. + Ratio of merges to total ground truth events.""" + return (round(self.m / self.total_gt_events, 4) + if self.total_gt_events > 0 + else 0) + + @cached_property + def cr_gt(self) -> float: + """Get the correct rate. + Ratio of correct events per log to total ground truth events.""" + return (round(self.correct_events_per_log / self.total_gt_events, 4) + if self.total_gt_events > 0 + else 0) + + @cached_property + def mdr(self) -> float: + """Get the merging rate. + Ratio of merging to total detected events.""" + return (round(self.md / self.total_det_events, 4) + if self.total_det_events > 0 + else 0) + + @cached_property + def fmdr(self) -> float: + """Get the fragmentating and merging rate. + Ratio of fragmentating and merging to total detected events.""" + return (round(self.fmd / self.total_det_events, 4) + if self.total_det_events > 0 + else 0) + + @cached_property + def fdr(self) -> float: + """Get the fragmentating rate. + Ratio of fragmentating to total detected events.""" + return (round(self.fd / self.total_det_events, 4) + if self.total_det_events > 0 + else 0) + + @cached_property + def idr(self) -> float: + """Get the insertion rate. + Ratio of insertion to total detected events.""" + return (round(self.id / self.total_det_events, 4) + if self.total_det_events > 0 + else 0) + + @cached_property + def cr_det(self) -> float: + """Get the correct rate. + Ratio of correct events per log to total detected events.""" + return (round(self.correct_events_per_log / self.total_det_events, 4) + if self.total_det_events > 0 + else 0) + + +@lru_cache +def _event_analysis(gt: sf.FrameHE, + det: sf.FrameHE, + start_end_per_case: sf.SeriesHE[sf.Index[np.str_], Any], + _: float = -1.0) -> EventAnalysis: + """Calculate the absolute EA metrics. + + Assume that the logs are filtered by activity and case id. + :param sf.FrameHE gt: The ground truth event log. + :param sf.FrameHE det: The detected event log. + :param start_end_by_case: The start and end times of the cases. + :return: The absolute EA metrics. + """ + case_id = _get_case_in_filtered_logs(gt, det) + _case_activity_level_metric_pre_check(gt, det) + segment_scores = _generate_segment_scores(gt, + det, + start_end_per_case[case_id][0], + start_end_per_case[case_id][1]) + proc_gt_rows = [] + proc_det_rows = [] + final_gt_rows = [] + final_det_rows = [] + for i in range(0, len(gt), 2): + if (gt.iloc[i]["lifecycle:transition"] != "start" + or gt.iloc[i + 1]["lifecycle:transition"] != "complete"): + raise ValueError(f"Invalid log row ordering. {BUG_REPORT_CTA}") + event_start = gt.iloc[i]["time:timestamp"] + event_end = gt.iloc[i + 1]["time:timestamp"] + proc_gt_rows.append({ + "start": event_start, + "end": event_end, + "types": [] + }) + for i in range(0, len(det), 2): + if (det.iloc[i]["lifecycle:transition"] != "start" + or det.iloc[i + 1]["lifecycle:transition"] != "complete"): + raise ValueError(f"Invalid log row ordering. {BUG_REPORT_CTA}") + event_start = det.iloc[i]["time:timestamp"] + event_end = det.iloc[i + 1]["time:timestamp"] + proc_det_rows.append({ + "start": event_start, + "end": event_end, + "types": [] + }) + for gt_event in proc_gt_rows: + contained_segment_scores = segment_scores.loc[ + (segment_scores["start"] >= gt_event["start"]) + & (segment_scores["end"] <= gt_event["end"])] + equal_segment_scores = segment_scores.loc[ + (segment_scores["start"] == gt_event["start"]) + & (segment_scores["end"] == gt_event["end"])] + if (len(equal_segment_scores) == 1 + and equal_segment_scores.iloc[0]["type"] == "D"): + gt_event["types"].append("D") + elif len(contained_segment_scores.loc[contained_segment_scores["type"] == "F"]) > 0: + gt_event["types"].append("F") + for det_event in proc_det_rows: + contained_segment_scores = segment_scores.loc[ + (segment_scores["start"] >= det_event["start"]) + & (segment_scores["end"] <= det_event["end"])] + equal_segment_scores = segment_scores.loc[ + (segment_scores["start"] == det_event["start"]) + & (segment_scores["end"] == det_event["end"])] + if (len(equal_segment_scores) == 1 + and equal_segment_scores.iloc[0]["type"] == "I"): + det_event["types"].append("I'") + elif len(contained_segment_scores.loc[contained_segment_scores["type"] == "M"]) > 0: + det_event["types"].append("M'") + for gt_event in proc_gt_rows: + overlapping_det_events = [det_event for det_event in proc_det_rows + if (gt_event["end"] >= det_event["start"] >= gt_event["start"]) + or (gt_event["start"] <= det_event["end"] <= gt_event["end"]) + or ((det_event["start"] <= gt_event["start"] <= det_event["end"]) + and (det_event["start"] <= gt_event["end"] <= det_event["end"]))] + if any("M'" in det_event["types"] for det_event in overlapping_det_events): + gt_event["types"].append("M") + for det_event in proc_det_rows: + overlapping_gt_events = [gt_event for gt_event in proc_gt_rows + if (det_event["end"] >= gt_event["start"] >= det_event["start"]) + or (det_event["start"] <= gt_event["end"] <= det_event["end"]) + or ((gt_event["start"] <= det_event["start"] <= gt_event["end"]) + and (gt_event["start"] <= det_event["end"] <= gt_event["end"]))] + if any("F" in gt_event["types"] for gt_event in overlapping_gt_events): + det_event["types"].append("F'") + for gt_event in proc_gt_rows: + if len(gt_event["types"]) == 0: + final_gt_rows.append({ + "start": gt_event["start"], + "end": gt_event["end"], + "type": "C" + }) + elif "D" in gt_event["types"]: + final_gt_rows.append({ + "start": gt_event["start"], + "end": gt_event["end"], + "type": "D" + }) + elif "F" in gt_event["types"] and "M" in gt_event["types"]: + final_gt_rows.append({ + "start": gt_event["start"], + "end": gt_event["end"], + "type": "FM" + }) + elif "F" in gt_event["types"]: + final_gt_rows.append({ + "start": gt_event["start"], + "end": gt_event["end"], + "type": "F" + }) + elif "M" in gt_event["types"]: + final_gt_rows.append({ + "start": gt_event["start"], + "end": gt_event["end"], + "type": "M" + }) + for det_event in proc_det_rows: + if len(det_event["types"]) == 0: + final_det_rows.append({ + "start": det_event["start"], + "end": det_event["end"], + "type": "C" + }) + elif "I'" in det_event["types"]: + final_det_rows.append({ + "start": det_event["start"], + "end": det_event["end"], + "type": "I'" + }) + elif "F'" in det_event["types"] and "M'" in det_event["types"]: + final_det_rows.append({ + "start": det_event["start"], + "end": det_event["end"], + "type": "FM'" + }) + elif "F'" in det_event["types"]: + final_det_rows.append({ + "start": det_event["start"], + "end": det_event["end"], + "type": "F'" + }) + elif "M'" in det_event["types"]: + final_det_rows.append({ + "start": det_event["start"], + "end": det_event["end"], + "type": "M'" + }) + gt_types = Counter(row["type"] for row in final_gt_rows) + det_types = Counter(row["type"] for row in final_det_rows) + if gt_types["C"] != det_types["C"]: + raise ValueError(f"The number of correct events in the ground truth and detected logs " + f"must be equal. {BUG_REPORT_CTA}") + return EventAnalysis( + d=gt_types["D"], + f=gt_types["F"], + fm=gt_types["FM"], + m=gt_types["M"], + c=gt_types["C"] + det_types["C"], + md=det_types["M'"], + fmd=det_types["FM'"], + fd=det_types["F'"], + id=det_types["I'"] + ) + + +def _event_analysis_by_activity_case(gt: sf.FrameHE, + det: sf.FrameHE, + activity: str, + case_id: str, + start_end_per_case: \ + sf.SeriesHE[sf.Index[np.str_], Any]) \ + -> EventAnalysis: + """Calculate the absolute EA metrics for a given activity and case id. + + :param sf.FrameHE gt: The ground truth event log. + :param sf.FrameHE det: The detected event log. + :param str activity: The activity name. + If "*" is passed, the EA rates will be calculated and averaged for all activities. + :param str case_id: The case id. + If "*" is passed, the EA rates will be calculated and averaged for all cases. + :param start_end_per_case: The start and end times of the cases. + :return: The absolute EA metrics. + """ + all_metrics = _generate_activity_metric_list(gt=gt, + det=det, + case_id=case_id, + activity_name=activity, + start_end_per_case=start_end_per_case, + metric=_event_analysis) + avg_ea_dict: Dict[str, Union[float, int]]= { + "d": 0, + "f": 0, + "fm": 0, + "m": 0, + "c": 0, + "md": 0, + "fmd": 0, + "fd": 0, + "id": 0 + } + for ea_metr in all_metrics: + for field in fields(EventAnalysis): + avg_ea_dict[field.name] += + getattr(ea_metr, field.name) + for field in fields(EventAnalysis): + avg_ea_dict[field.name] = round(avg_ea_dict[field.name] / len(all_metrics), 4) + return EventAnalysis(**avg_ea_dict) diff --git a/package/aqudem/two_set_helper.py b/package/aqudem/two_set_helper.py new file mode 100644 index 0000000..fe2feed --- /dev/null +++ b/package/aqudem/two_set_helper.py @@ -0,0 +1,237 @@ +"""Module to calculate the 2SET metrics.""" +from dataclasses import dataclass, fields +from datetime import timedelta +from functools import cached_property, lru_cache +from typing import Union, Any, Dict +import numpy as np +import static_frame as sf +from .ward_helper import _generate_segment_scores +from .utils import (_case_activity_level_metric_pre_check, + _generate_activity_metric_list, BUG_REPORT_CTA, _get_case_in_filtered_logs) + + +# pylint: disable=too-many-instance-attributes +@dataclass(frozen=True) +class TwoSet: + """Data class to hold the absolute 2SET metrics. + + tp: int, True Positives + tn: int, True Negatives + d: int, Deletions + f: int, Fragmentations + ua: int, Underfullings (at the start) + uo: int, Underfullings (at the end) + i: int, Insertions + m: int, Merges + oa: int, Overfullings (at the start) + oo: int, Overfullings (at the end) + """ + tp: Union[int, float] + tn: Union[int, float] + d: Union[int, float] + f: Union[int, float] + ua: Union[int, float] + uo: Union[int, float] + i: Union[int, float] + m: Union[int, float] + oa: Union[int, float] + oo: Union[int, float] + + @cached_property + def p(self) -> Union[int, float]: + """Get total positive frames.""" + return self.tp + self.d + self.f + self.ua + self.uo + + @cached_property + def n(self) -> Union[int, float]: + """Get total negative frames.""" + return self.tn + self.i + self.m + self.oa + self.oo + + @cached_property + def t(self) -> Union[int, float]: + """Get total frames.""" + return (self.tp + self.tn + self.d + self.f + self.ua + + self.uo + self.i + self.m + self.oa + self.oo) + + @cached_property + def tpr(self) -> float: + """Get the True Positive Rate. + Ratio of true positives to total positives.""" + return round(self.tp / self.p if self.p != 0 else 0, 4) + + @cached_property + def tnr(self) -> float: + """Get the True Negative Rate. + Ratio of true negatives to total negatives.""" + return round(self.tn / self.n if self.n != 0 else 0, 4) + + @cached_property + def dr(self) -> float: + """Get the Deletion Rate. + Ratio of deletions to total positives.""" + return round(self.d / self.p if self.p != 0 else 0, 4) + + @cached_property + def fr(self) -> float: + """Get the Fragmentation Rate. + Ratio of fragmentations to total positives.""" + return round(self.f / self.p if self.p != 0 else 0, 4) + + @cached_property + def uar(self) -> float: + """Get the Underfilling Rate (at the start). + Ratio of underfullings at the start to total positives.""" + return round(self.ua / self.p if self.p != 0 else 0, 4) + + @cached_property + def uor(self) -> float: + """Get the Underfilling Rate (at the end). + Ratio of underfullings at the end to total positives.""" + return round(self.uo / self.p if self.p != 0 else 0, 4) + + @cached_property + def ir(self) -> float: + """Get the Insertion Rate. + Ratio of insertions to total negatives.""" + return round(self.i / self.n if self.n != 0 else 0, 4) + + @cached_property + def mr(self) -> float: + """Get the Merge Rate. + Ratio of merges to total negatives.""" + return round(self.m / self.n if self.n != 0 else 0, 4) + + @cached_property + def oar(self) -> float: + """Get the Overfilling Rate (at the start). + Ratio of overfullings at the start to total negatives.""" + return round(self.oa / self.n if self.n != 0 else 0, 4) + + @cached_property + def oor(self) -> float: + """Get the Overfilling Rate (at the end). + Ratio of overfullings at the end to total negatives.""" + return round(self.oo / self.n if self.n != 0 else 0, 4) + + +# pylint: disable=too-many-arguments, too-many-locals, too-many-branches, too-many-statements +@lru_cache +def _two_set(gt: sf.FrameHE, + det: sf.FrameHE, + start_end_by_case: sf.SeriesHE[sf.Index[np.str_], Any], + sampling_freq: float,) -> TwoSet: + """Calculate the absolute 2SET metrics. + + Assume that logs are filtered by case and activity. + :param gt: The ground truth DataFrame. + :param det: The detected DataFrame. + :param sampling_freq: The sampling frequency of the logs, in Hz. + :param start_end_by_case: The start and end times for each case. + :return: The absolute 2SET metrics. + """ + case_id = _get_case_in_filtered_logs(gt, det) + _case_activity_level_metric_pre_check(gt, det) + sampling_timedelta = timedelta(seconds=1 / sampling_freq) + segment_scores_frame = _generate_segment_scores(gt, + det, + start_end_by_case[case_id][0], + start_end_by_case[case_id][1]) + sorted_segments_scores_frame = segment_scores_frame.sort_values("start") + # initialize the counters + tp = 0 + tn = 0 + d = 0 + f = 0 + ua = 0 + uo = 0 + i = 0 + m = 0 + oa = 0 + oo = 0 + # iterate through the time represented in the segment df + current_time = sorted_segments_scores_frame.iloc[0]["start"] + while True: + current_segment = sorted_segments_scores_frame.loc[ + sorted_segments_scores_frame["start"] <= current_time].iloc[-1] + if len(current_segment) == 0: + raise ValueError(f"No segment found for the current time. {BUG_REPORT_CTA}") + if len(current_segment) > 1 and not isinstance(current_segment, sf.Series): + raise ValueError("Multiple segments found for the current time. " + f"{BUG_REPORT_CTA}") + # get the type of the current segment + current_type = current_segment.loc["type"] + # update the counters + if current_type == "TP": + tp += 1 + elif current_type == "TN": + tn += 1 + elif current_type == "D": + d += 1 + elif current_type == "F": + f += 1 + elif current_type == "Ua": + ua += 1 + elif current_type == "Uo": + uo += 1 + elif current_type == "I": + i += 1 + elif current_type == "M": + m += 1 + elif current_type == "Oa": + oa += 1 + elif current_type == "Oo": + oo += 1 + # move to the next time + current_time += sampling_timedelta + # if the current time is larger than the last segment, break the loop + if current_time > sorted_segments_scores_frame.iloc[-1]["end"]: + break + return TwoSet(tp=tp, tn=tn, d=d, f=f, ua=ua, uo=uo, i=i, m=m, oa=oa, oo=oo) + + +def _two_set_by_activity_case(gt: sf.FrameHE, + det: sf.FrameHE, + sampling_freq: float, + activity_name: str, + case_id: str, + start_end_per_case: sf.SeriesHE[sf.Index[np.str_], Any]) \ + -> TwoSet: + """Calculate the absolute 2SET metrics for a given activity and case. + + :param gt: The ground truth log. + :param det: The detected log. + :param activity_name: The name of the activity. + If "*" is passed, the 2SET metrics will be calculated + and averaged for all activities. + :param case_id: The case ID. + If "*" is passed, the 2SET metrics will be calculated + and averaged for all cases. + :param start_end_per_case: The start and end times for each case. + :return: The absolute 2SET metrics. + """ + two_set_metrics = _generate_activity_metric_list(gt=gt, + det=det, + sampling_freq=sampling_freq, + case_id=case_id, + activity_name=activity_name, + start_end_per_case=start_end_per_case, + metric=_two_set) + avg_two_set_dict: Dict[str, Union[float, int]] = { + "tp": 0, + "tn": 0, + "d": 0, + "f": 0, + "ua": 0, + "uo": 0, + "i": 0, + "m": 0, + "oa": 0, + "oo": 0 + } + for two_set_metr in two_set_metrics: + for field in fields(TwoSet): + avg_two_set_dict[field.name] += getattr(two_set_metr, field.name) + for field in fields(TwoSet): + avg_two_set_dict[field.name] = round(avg_two_set_dict[field.name] + / len(two_set_metrics), 4) + return TwoSet(**avg_two_set_dict) diff --git a/package/aqudem/utils.py b/package/aqudem/utils.py new file mode 100644 index 0000000..1d8374e --- /dev/null +++ b/package/aqudem/utils.py @@ -0,0 +1,377 @@ +"""This module contains general utility functionality for the aqudem library.""" +from typing import List, Any, Callable, Dict + +import numpy as np +import static_frame as sf + +BUG_REPORT_CTA = "This is most likely a bug. Please report the issue." + + +class Error(Exception): # will be used as aqudem.Error + """Base class for all of this library's exceptions.""" + + +class XESMissingTraceNameAttribute(Error): + """The trace needs to be named.""" + + +class XESMissingSamplingFreqError(Error): + """The trace is missing the sampling frequency.""" + + +class XESMissingTimestamp(Error): + """An event is missing the timestamp attribute.""" + + +class XESMissingActivityName(Error): + """An event is missing the activity name attribute.""" + + +class XESMissingActivityInstance(Error): + """An event is missing the activity instance attribute.""" + + +class XESIncorrectLifecycleTransitionError(Error): + """The lifecycle transition is not correct.""" + + +def _validate_xes_dataframe_before_processing(xes_ground_truth: sf.FrameHE, + xes_detected: sf.FrameHE) -> None: + """Check if the XES logs are in a valid format. + + :param xes_ground_truth: The ground truth XES log. + :param xes_detected: The detected XES log. + :returns: None""" + if ("case:sampling_freq" not in xes_detected.columns + or xes_detected["case:sampling_freq"].count(unique=True) != 1): + raise XESMissingSamplingFreqError("All XES traces in the log must have " + "a 'sampling_freq' attribute.") + for xes_frame in [xes_ground_truth, xes_detected]: + if ("case:concept:name" not in xes_frame.columns + or xes_frame["case:concept:name"].isna().any()): + raise XESMissingTraceNameAttribute("The trace must have a name.") + if ("lifecycle:transition" not in xes_frame.columns + or not xes_frame["lifecycle:transition"].isin(["complete", "start"]).all() + or (len(xes_frame.loc[xes_frame["lifecycle:transition"] == "start"]) + != len(xes_frame.loc[xes_frame["lifecycle:transition"] == "complete"]))): + raise XESIncorrectLifecycleTransitionError( + "Each activity instance must have an event with a lifecycle transition" + " from start to complete." + ) + if ("time:timestamp" not in xes_frame.columns + or xes_frame["time:timestamp"].isna().any()): + raise XESMissingTimestamp("Each event must have a timestamp attribute.") + if ("concept:name" not in xes_frame.columns + or xes_frame["concept:name"].isna().any()): + raise XESMissingActivityName("Each event must have an activity name attribute.") + + +def _validate_xes_dataframe_after_processing(xes_ground_truth: sf.FrameHE, + xes_detected: sf.FrameHE) -> None: + """ make sure that for each case-activity, + the "lifecycle:transition" is always first 'start', then 'complete' """ + for xes_frame in [xes_ground_truth, xes_detected]: + for case_id in xes_frame["case:concept:name"].unique(): + case = xes_frame.loc[xes_frame["case:concept:name"] == case_id] + for activity_name in case["concept:name"].unique(): + activity = case.loc[case["concept:name"] == activity_name] + current = "complete" + for transition in activity["lifecycle:transition"].values: + if current == "complete" and transition == "start": + current = "start" + elif current == "start" and transition == "complete": + current = "complete" + else: + raise XESIncorrectLifecycleTransitionError( + "Each activity instance must have an event with a lifecycle transition" + " from start to complete." + ) + + +def _validate_activity_name(event_log1_fr: sf.FrameHE, + event_log2_fr: sf.FrameHE, + activity_name: str) -> None: + """Check if the activity name is in the event log. + + :param event_log1_fr: The event log. + :param event_log2_fr: The event log. + :param activity_name: The name of the activity to validate. + :returns: None + """ + if ((activity_name not in event_log1_fr["concept:name"].unique() + and activity_name not in event_log2_fr["concept:name"].unique()) + and activity_name != "*"): + raise ValueError(f"The activity name '{activity_name}' is not in the event logs.") + + +def _validate_case_id(event_log1_fr: sf.FrameHE, + event_log2_fr: sf.FrameHE, + case_id: str) -> None: + """Check if the case ID is in the event log. + + :param event_log1_fr: The event log. + :param event_log2_fr: The event log. + :param str case_id: The case ID to validate. + :returns: None + """ + if ((case_id not in event_log1_fr["case:concept:name"].unique() + or case_id not in event_log2_fr["case:concept:name"].unique()) + and case_id != "*"): + raise ValueError(f"The case ID '{case_id}' is not in the event logs.") + + +def _logs_contain_at_most_one_case_id(event_log_one: sf.FrameHE, + event_log_two: sf.FrameHE) -> bool: + """Check if the event logs contains only one case ID. + + :param event_log_one: The event log. + :param event_log_two: The event log. + :returns: True if the event log contains only one case ID, False otherwise. + :rtype: bool + """ + all_case_names = (list(event_log_one["case:concept:name"].values) + + list(event_log_two["case:concept:name"].values)) + all_case_names = list(set(all_case_names)) + return len(all_case_names) == 0 or len(all_case_names) == 1 + + +def _logs_contain_at_most_one_activity(event_log_one: sf.FrameHE, + event_log_two: sf.FrameHE) -> bool: + """Check if the event logs contain only one activity. + + :param event_log_one: The event log. + :param event_log_two: The event log. + :param str activity_name: The name of the activity to check for. + :returns: True if the event logs contain only one activity, False otherwise. + :rtype: bool + """ + all_activity_names = (list(event_log_one["concept:name"].values) + + list(event_log_two["concept:name"].values)) + all_activity_names = list(set(all_activity_names)) + return len(all_activity_names) == 0 or len(all_activity_names) == 1 + + +def _has_all_required_columns(event_log: sf.FrameHE) -> bool: + """Check if the event log has all required columns. + + The required columns are ["case:concept:name", "concept:name", + "lifecycle:transition", "time:timestamp"]. + :param event_log: The event log . + :returns: True if the event log has all required columns, False otherwise. + :rtype: bool + """ + required_columns = ["case:concept:name", "concept:name", + "lifecycle:transition", "time:timestamp"] + return all(col in list(event_log.columns) for col in required_columns) + + +def _case_activity_level_metric_pre_check(event_log_one: sf.FrameHE, + event_log_two: sf.FrameHE) -> None: + """Check if the event logs are in a valid format for case and activity level metrics. + + :param event_log_one: The event log. + :param event_log_two: The event log. + :param str activity_name: The name of the activity to validate. + :param str case_id: The case ID to validate. + :returns: None + """ + if not _has_all_required_columns(event_log_one) or not _has_all_required_columns(event_log_two): + raise ValueError(f"The event logs are missing required columns. {BUG_REPORT_CTA}") + if not _logs_contain_at_most_one_case_id(event_log_one, event_log_two): + raise ValueError(f"The event logs must contain exactly one case ID. {BUG_REPORT_CTA}") + if not _logs_contain_at_most_one_activity(event_log_one, event_log_two): + raise ValueError(f"The event logs must contain exactly one activity type. {BUG_REPORT_CTA}") + + +def _case_level_metric_pre_check(event_log_one: sf.FrameHE, + event_log_two: sf.FrameHE) -> None: + """Check if the event logs are in a valid format for case level metrics. + + :param event_log_one: The event log. + :param event_log_two: The event log. + :returns: None + """ + if not _has_all_required_columns(event_log_one) or not _has_all_required_columns(event_log_two): + raise ValueError(f"The event logs are missing required columns. {BUG_REPORT_CTA}") + if not _logs_contain_at_most_one_case_id(event_log_one, event_log_two): + raise ValueError(f"The event logs must contain exactly one case ID. {BUG_REPORT_CTA}") + + +def _determine_start_end_per_case(gt: sf.FrameHE, + det: sf.FrameHE) -> sf.SeriesHE[sf.Index[np.str_], Any]: + """Determine the start and end time per case. + + :param gt: The ground truth event log. + :param det: The detected event log. + :returns: A dictionary with the case ID as key and a tuple with the start and end time as value. + """ + start_end_dict = {} + for case_id in gt["case:concept:name"].unique(): + gt_case = gt.loc[gt["case:concept:name"] == case_id] + det_case = det.loc[det["case:concept:name"] == case_id] + start_time = min(gt_case["time:timestamp"].min(), det_case["time:timestamp"].min()) + end_time = max(gt_case["time:timestamp"].max(), det_case["time:timestamp"].max()) + start_end_dict[case_id] = (start_time, end_time) + return sf.SeriesHE.from_dict(start_end_dict) + + +# pylint: disable=too-many-arguments, too-many-locals +def _generate_activity_metric_list(gt: sf.FrameHE, + det: sf.FrameHE, + case_id: str, + activity_name: str, + start_end_per_case: sf.SeriesHE[sf.Index[np.str_], Any], + metric: Callable[[sf.FrameHE, + sf.FrameHE, + sf.SeriesHE, + float], Any], + sampling_freq: float = -1.0) -> List[Any]: + """ Generate a list of metrics for a given case and activity. + + :param gt: The ground truth log. + :param det: The detected log. + :param case_id: The case ID. + If "*" is passed, the metric will be calculated and averaged for all + case IDs. + :param activity_name: The name of the activity. + If "*" is passed, the metric will be calculated and averaged for all + activities. + :param start_end_per_case: The start and end times for each case. + :param metric: The metric to calculate. + :param sampling_freq: The sampling frequency of the logs, in Hertz. + :returns: A list of metrics. + """ + if not _has_all_required_columns(gt) or not _has_all_required_columns(det): + raise ValueError("Logs must have columns case:concept:name, " + f"concept:name, lifecycle:transition, time:timestamp. {BUG_REPORT_CTA}") + if case_id == "*": + relevant_case_ids = list(set((list(gt["case:concept:name"].values) + + list(det["case:concept:name"].values)))) + elif case_id != "*": + relevant_case_ids = [case_id] + else: + raise ValueError(f"Case ID must be '*' or a valid case ID. {BUG_REPORT_CTA}") + metric_list = [] + for case_name in relevant_case_ids: + gt_filtered_by_case = gt.loc[gt["case:concept:name"] == case_name] + det_filtered_by_case = det.loc[det["case:concept:name"] == case_name] + if activity_name == "*": + relevant_activity_names = list( + set((list(gt_filtered_by_case["concept:name"].values) + + list(det_filtered_by_case["concept:name"].values)))) + elif activity_name != "*": + relevant_activity_names = [activity_name] + else: + raise ValueError("Activity name must be '*' or a valid activity name. " + f"{BUG_REPORT_CTA}") + for act_name in relevant_activity_names: + gt_filtered_by_activity = gt_filtered_by_case.loc[gt_filtered_by_case[ + "concept:name"] + == act_name] + det_filtered_by_activity = det_filtered_by_case.loc[det_filtered_by_case[ + "concept:name"] + == act_name] + if len(gt_filtered_by_activity) == 0 and len(det_filtered_by_activity) == 0: + continue + metric_list.append(metric(gt_filtered_by_activity, + det_filtered_by_activity, + start_end_per_case, + sampling_freq)) + return metric_list + + +def _generate_sequence_metric_list(gt: sf.FrameHE, + det: sf.FrameHE, + case_id: str, + metric: Callable[[sf.FrameHE, + sf.FrameHE], Any], ) -> List[Any]: + """ Generate a list of sequence metrics for a given case. + + :param gt: The ground truth log. + :param det: The detected log. + :param case_id: The case ID to calculate the metric for. + If "*" is passed, the metric will be calculated and averaged for all + case IDs. + :param metric: The metric to calculate. + :returns: A list of metrics. + """ + if not _has_all_required_columns(gt) or not _has_all_required_columns(det): + raise ValueError("Logs must have columns case:concept:name, " + f"concept:name, lifecycle:transition, time:timestamp. {BUG_REPORT_CTA}") + if case_id == "*": + relevant_case_ids = list(set((list(gt["case:concept:name"].values) + + list(det["case:concept:name"].values)))) + elif case_id != "*": + relevant_case_ids = [case_id] + else: + raise ValueError(f"Case ID must be '*' or a valid case ID. {BUG_REPORT_CTA}") + metric_list = [] + for case_name in relevant_case_ids: + gt_filtered_by_case = gt.loc[gt["case:concept:name"] == case_name] + det_filtered_by_case = det.loc[det["case:concept:name"] == case_name] + if len(gt_filtered_by_case) == 0 and len(det_filtered_by_case) == 0: + continue + metric_list.append(metric(gt_filtered_by_case, + det_filtered_by_case)) + return metric_list + + +def _count_values(input_frame: sf.FrameHE, column: str) -> Dict[Any, int]: + """Count the number of unique values in a Series. + + :param input_frame: The input Series. + :returns: A Series with the unique values as index and the count as values. + """ + # Iterate over the rows of the input FrameHE and count the values + # for the given column + value_counts = {} + for value in input_frame[column].values: + if value not in value_counts: + value_counts[value] = 1 + else: + value_counts[value] += 1 + return value_counts + + +def _remove_events_with_length_zero(log: sf.FrameHE) -> sf.FrameHE: + """Remove events with a duration of zero. + + :param log: The event log. + :returns: The event log without events with a duration of zero. + """ + case_activity_timestamp_combinations_to_remove = [] + for case_id in log["case:concept:name"].unique(): + case = log.loc[log["case:concept:name"] == case_id] + for activity_name in case["concept:name"].unique(): + activity = case.loc[case["concept:name"] == activity_name] + value_counts = _count_values(activity, "time:timestamp") + keys = [key for key, value in value_counts.items() if value > 1] + if len(keys) == 0: + continue + case_activity_timestamp_combinations_to_remove.extend( + [(case_id, activity_name, key) for key in keys]) + if len(case_activity_timestamp_combinations_to_remove) == 0: + return log + for case_id, activity_name, timestamp in case_activity_timestamp_combinations_to_remove: + log = log.loc[~((log["case:concept:name"] == case_id) + & (log["concept:name"] == activity_name) + & (log["time:timestamp"] == timestamp))] + return log + + +def _get_case_in_filtered_logs(gt: sf.FrameHE, det: sf.FrameHE) -> str: + """Get the case ID in the filtered logs. + + :param gt: The ground truth log. + :param det: The detected log. + :returns: None + """ + + if len(gt) > 0: + case_id = str(gt["case:concept:name"].iloc[0]) + elif len(det) > 0: + case_id = str(det["case:concept:name"].iloc[0]) + else: + raise ValueError("Both logs, gt and det, are empty. " + f"Cannot calculate EA metrics. {BUG_REPORT_CTA}") + return case_id diff --git a/package/aqudem/ward_helper.py b/package/aqudem/ward_helper.py new file mode 100644 index 0000000..558c298 --- /dev/null +++ b/package/aqudem/ward_helper.py @@ -0,0 +1,198 @@ +"""Helper functions for metrics from Ward et al; used in event analysis and 2SET.""" +from datetime import datetime +from functools import lru_cache +from typing import Union +import static_frame as sf +from numpy import nan +from .utils import BUG_REPORT_CTA + +EIGHT_TYPE_MAPPING_P_C_N = { + (nan, "FP", "TN"): "I", + (nan, "FP", "FN"): "I", + (nan, "FN", "TN"): "D", + (nan, "FN", "FP"): "D", + (nan, "FP", "TP"): "Oa", + (nan, "FN", "TP"): "Ua", + ("TP", "FP", "TP"): "M", + ("TP", "FN", "TP"): "F", + ("TN", "FP", "TP"): "Oa", + ("FN", "FP", "TP"): "Oa", + ("TN", "FN", "TP"): "Ua", + ("FP", "FN", "TP"): "Ua", + ("TN", "FP", "TN"): "I", + ("FN", "FP", "TN"): "I", + ("TN", "FP", "FN"): "I", + ("FN", "FP", "FN"): "I", + ("TN", "FN", "TN"): "D", + ("FP", "FN", "TN"): "D", + ("TN", "FN", "FP"): "D", + ("FP", "FN", "FP"): "D", + ("TP", "FP", "TN"): "Oo", + ("TP", "FP", "FN"): "Oo", + ("TP", "FN", "TN"): "Uo", + ("TP", "FN", "FP"): "Uo", + ("TN", "FP", nan): "I", + ("FN", "FP", nan): "I", + ("TN", "FN", nan): "D", + ("FP", "FN", nan): "D", + ("TP", "FP", nan): "Oo", + ("TP", "FN", nan): "Uo" +} + +FOUR_TYPE_MAPPING_GT_DET = { + (True, True): "TP", + (False, False): "TN", + (False, True): "FP", + (True, False): "FN" +} + + +def _generate_eight_type(prev_type: Union[str, None], + segment_type: str, + next_type: Union[str, None]) -> str: + """Generate the eight type of the segment based on the + type of the segment and the previous and next segment. + + :param str segment_type: The type of the segment. + In ["TP", "TN", "I", "M", "O", "D", "F", "U"]. + :param str prev_type: The type of the previous segment. + In ["TP", "TN", "I", "M", "O", "D", "F", "U"]. + :param str next_type: The type of the next segment. + In ["TP", "TN", "I", "M", "O", "D", "F", "U"]. + :return: The eight type of the segment. + :rtype: str + """ + if segment_type not in ["TP", "TN", "FP", "FN"]: + raise ValueError("The segment type must be 'TP', 'TN', 'FP', or 'FN'.") + if prev_type not in ["TP", "TN", "FP", "FN", nan]: + raise ValueError("The previous segment type must be 'TP', 'TN', 'FP', 'FN', or None.") + if next_type not in ["TP", "TN", "FP", "FN", nan]: + raise ValueError("The next segment type must be 'TP', 'TN', 'FP', 'FN', or None.") + if segment_type in ["TP", "TN"]: + return segment_type + return EIGHT_TYPE_MAPPING_P_C_N[(prev_type, segment_type, next_type)] + + +def _is_during_activity_exec(log: sf.FrameHE, timestamp: datetime) -> bool: + """ Check if the timestamp is during an activity execution. + + Assumes event log to be filtered for certain activity and case. + Relevant columns/format of the input DataFrames: + ground_truth: + - "lifecycle:transition": The lifecycle transition of the event. + - "time:timestamp": The timestamp of the event, start and complete for all + :param pd.DataFrame log: The event log. + :param datetime timestamp: The timestamp to check. + """ + # check if middle timestamp is during activity in ground truth and detected + # get the first value with a timestamp smaller than the middle timestamp + log_before_middle = log.loc[log["time:timestamp"] <= timestamp] + if len(log_before_middle) == 0: + return False + highest_before_middle = log_before_middle.iloc[-1] + if highest_before_middle["lifecycle:transition"] == "start": + return True + if highest_before_middle["lifecycle:transition"] == "complete": + return False + raise ValueError("The ground truth log is not in the correct format.") + + +# pylint: disable=too-many-locals +@lru_cache +def _generate_segment_scores(ground_truth: sf.FrameHE, + detected: sf.FrameHE, + start_time: datetime, + end_time: datetime) -> sf.FrameHE: + """Generate the segment scores for the ground truth and detected logs. + + Assumes event logs to be filtered for certain activity and case. + Relevant columns/format of the input DataFrames: + ground_truth: + - "lifecycle:transition": The lifecycle transition of the event. + - "time:timestamp": The timestamp of the event. + detected: + - "lifecycle:transition": The lifecycle transition of the event. + - "time:timestamp": The timestamp of the event. + Format of the generated DataFrame: + Columns: + - "segment_id": The segment ID. + - "start": The start timestamp of the segment. + - "end": The end timestamp of the segment. + - "type": The type of the segment. + In ["TP", "TN", "I", "M", "Oa", "Oo", "D", "F", "Ua", "Uo"]. + :param ground_truth: The ground truth log. + :param detected: The detected log. + :param start_time: The start time of the trace/case. + :param end_time: The end time of the trace/case. + """ + if len(ground_truth) == 0 and len(detected) == 0: + return sf.FrameHE(columns=["start", "end", "type"]) + # Filter the logs to only contain the start and complete events + ground_truth_filtered: sf.FrameHE = ground_truth.loc[ + ground_truth["lifecycle:transition"].isin(["start", "complete"])] # type: ignore + detected_filtered = detected.loc[detected["lifecycle:transition"].isin(["start", "complete"])] + # merge the logs and extract list so that + # every start and complete event are turned into a boundary + merged = sf.FrameHE.from_concat([ground_truth_filtered, detected_filtered], + index=sf.IndexAutoFactory, + axis=0) + merged = merged.sort_values("time:timestamp") + boundary_timestamps = list(set(list(merged["time:timestamp"].values))) + boundary_timestamps.sort() + if start_time < boundary_timestamps[0]: + boundary_timestamps.insert(0, start_time) + elif start_time > boundary_timestamps[0]: + raise ValueError(f"Internal value error regarding timing. {BUG_REPORT_CTA}") + if end_time > boundary_timestamps[-1]: + boundary_timestamps.append(end_time) + elif end_time < boundary_timestamps[-1]: + raise ValueError(f"Internal value error regarding timing. {BUG_REPORT_CTA}") + # create a new Dataframe with columns start, end, type + segment_scores_list = [] + # make sure that the boundary timestamps are in the correct order + if boundary_timestamps != sorted(boundary_timestamps): + raise ValueError(f"The boundary timestamps are not in the correct order. {BUG_REPORT_CTA}") + # iterate over the boundary timestamps and create the segments + current_type = "None" + for segment in range(len(boundary_timestamps) - 1): + start = boundary_timestamps[segment] + end = boundary_timestamps[segment + 1] + # get the middle timestamp of the segment + middle = start + (end - start) / 2 + gt_is_active = _is_during_activity_exec(ground_truth_filtered, middle) + det_is_active = _is_during_activity_exec(detected_filtered, middle) # type: ignore + segment_type = FOUR_TYPE_MAPPING_GT_DET[(gt_is_active, det_is_active)] + # append the segment to the segment_scores DataFrame + prior_type = current_type + current_type = segment_type + if prior_type == current_type: + raise ValueError(f"The current and prior segment types are the same. {BUG_REPORT_CTA}") + segment_scores_list.append({"start": start, "end": end, "type": segment_type}) + segment_scores_frame = sf.FrameHE.from_dict_records(segment_scores_list) + # sort the segment_scores DataFrame by the start timestamp + segment_scores_frame = segment_scores_frame.sort_values("start") + # extend the dataframe so that for every row there are new columns + # prev_type and next_type based on the type of the previous and next segment + segment_scores_frame = ( + sf.FrameHE.from_concat([segment_scores_frame, segment_scores_frame["type"].shift(1)], + axis=1, + columns=['start', 'end', 'type', 'prev_type'])) + segment_scores_frame = ( + sf.FrameHE.from_concat([segment_scores_frame, segment_scores_frame["type"].shift(-1)], + axis=1, + columns=['start', 'end', 'type', 'prev_type', 'next_type'])) + # generate new column eight_type based on the type + # of the segment and the previous and next segment + eight_types = ( + segment_scores_frame.iter_series(axis=1).apply( + lambda x: _generate_eight_type(x["prev_type"], x["type"], x["next_type"]))) + segment_scores_frame = ( + sf.FrameHE.from_concat([segment_scores_frame, eight_types], + axis=1, + columns=['start', 'end', 'type', 'prev_type', + 'next_type', 'eight_type'])) + # remove type, prev_type, and next_type columns + segment_scores_frame = segment_scores_frame.drop[["type", "prev_type", "next_type"]] + # rename the eight_type column to type + segment_scores_frame = segment_scores_frame.relabel(columns=['start', 'end', 'type']) + return segment_scores_frame diff --git a/package/code-check.sh b/package/code-check.sh new file mode 100755 index 0000000..5766ec5 --- /dev/null +++ b/package/code-check.sh @@ -0,0 +1,9 @@ +# Test and coverage +export PYTHONPATH=. +coverage run --source=aqudem --branch -m pytest +coverage report --fail-under=90 -m +# Lint +pylint aqudem +pylint --disable=protected-access,missing-function-docstring tests +# Type check +mypy aqudem tests --strict diff --git a/package/docs/Makefile b/package/docs/Makefile new file mode 100644 index 0000000..4cb0592 --- /dev/null +++ b/package/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = python -msphinx +SPHINXPROJ = aqudem +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/package/docs/authors.rst b/package/docs/authors.rst new file mode 100644 index 0000000..e122f91 --- /dev/null +++ b/package/docs/authors.rst @@ -0,0 +1 @@ +.. include:: ../AUTHORS.rst diff --git a/package/docs/conf.py b/package/docs/conf.py new file mode 100644 index 0000000..554c572 --- /dev/null +++ b/package/docs/conf.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python +# +# aqudem documentation build configuration file, created by +# sphinx-quickstart on Fri Jun 9 13:47:02 2017. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +# If extensions (or modules to document with autodoc) are in another +# directory, add these directories to sys.path here. If the directory is +# relative to the documentation root, use os.path.abspath to make it +# absolute, like shown here. +# +import os +import sys +sys.path.insert(0, os.path.abspath('..')) + +import aqudem + +# -- General configuration --------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = 'AquDeM' +copyright = "2024, Aaron F. Kurz" +author = "Aaron F. Kurz" + +# The version info for the project you're documenting, acts as replacement +# for |version| and |release|, also used in various other places throughout +# the built documents. +# +# The short X.Y version. +version = aqudem.__version__ +# The full version, including alpha/beta/rc tags. +release = aqudem.__version__ + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This patterns also effect to html_static_path and html_extra_path +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + + +# -- Options for HTML output ------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'alabaster' + +# Theme options are theme-specific and customize the look and feel of a +# theme further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + + +# -- Options for HTMLHelp output --------------------------------------- + +# Output file base name for HTML help builder. +htmlhelp_basename = 'aqudemdoc' + + +# -- Options for LaTeX output ------------------------------------------ + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass +# [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'aqudem.tex', + 'AquDeM Documentation', + 'Aaron F. Kurz', 'manual'), +] + + +# -- Options for manual page output ------------------------------------ + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'aqudem', + 'AquDeM Documentation', + [author], 1) +] + + +# -- Options for Texinfo output ---------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'aqudem', + 'AquDeM Documentation', + author, + 'aqudem', + 'One line description of project.', + 'Miscellaneous'), +] + + + diff --git a/package/docs/contributing.rst b/package/docs/contributing.rst new file mode 100644 index 0000000..e582053 --- /dev/null +++ b/package/docs/contributing.rst @@ -0,0 +1 @@ +.. include:: ../CONTRIBUTING.rst diff --git a/package/docs/history.rst b/package/docs/history.rst new file mode 100644 index 0000000..2506499 --- /dev/null +++ b/package/docs/history.rst @@ -0,0 +1 @@ +.. include:: ../HISTORY.rst diff --git a/package/docs/index.rst b/package/docs/index.rst new file mode 100644 index 0000000..3d5e1cc --- /dev/null +++ b/package/docs/index.rst @@ -0,0 +1,20 @@ +Welcome to AquDeM's documentation! +====================================== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + readme + installation + usage + modules + contributing + authors + history + +Indices and tables +================== +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/package/docs/installation.rst b/package/docs/installation.rst new file mode 100644 index 0000000..c6417dd --- /dev/null +++ b/package/docs/installation.rst @@ -0,0 +1,51 @@ +.. highlight:: shell + +============ +Installation +============ + + +Stable release +-------------- + +To install AquDeM, run this command in your terminal: + +.. code-block:: console + + $ pip install aqudem + +This is the preferred method to install AquDeM, as it will always install the most recent stable release. + +If you don't have `pip`_ installed, this `Python installation guide`_ can guide +you through the process. + +.. _pip: https://pip.pypa.io +.. _Python installation guide: http://docs.python-guide.org/en/latest/starting/installation/ + + +From sources +------------ + +The sources for AquDeM can be downloaded from the `Github repo`_. + +You can either clone the public repository: + +.. code-block:: console + + $ git clone git://github.com/aaronkurz/aqudem + +Or download the `tarball`_: + +.. code-block:: console + + $ curl -OJL https://github.com/aaronkurz/aqudem/tarball/master + +Once you have a copy of the source, you can install it with: + +.. code-block:: console + + $ python setup.py install + + +.. _Github repo: https://github.com/aaronkurz/aqudem +.. _tarball: https://github.com/aaronkurz/aqudem/tarball/master diff --git a/package/docs/make.bat b/package/docs/make.bat new file mode 100644 index 0000000..96f9b3e --- /dev/null +++ b/package/docs/make.bat @@ -0,0 +1,36 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=python -msphinx +) +set SOURCEDIR=. +set BUILDDIR=_build +set SPHINXPROJ=aqudem + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The Sphinx module was not found. Make sure you have Sphinx installed, + echo.then set the SPHINXBUILD environment variable to point to the full + echo.path of the 'sphinx-build' executable. Alternatively you may add the + echo.Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd diff --git a/package/docs/readme.rst b/package/docs/readme.rst new file mode 100644 index 0000000..72a3355 --- /dev/null +++ b/package/docs/readme.rst @@ -0,0 +1 @@ +.. include:: ../README.rst diff --git a/package/docs/usage.rst b/package/docs/usage.rst new file mode 100644 index 0000000..f08f369 --- /dev/null +++ b/package/docs/usage.rst @@ -0,0 +1,7 @@ +===== +Usage +===== + +To use AquDeM in a project:: + + import aqudem diff --git a/package/requirements.txt b/package/requirements.txt new file mode 100644 index 0000000..cb332f6 --- /dev/null +++ b/package/requirements.txt @@ -0,0 +1,6 @@ +pm4py~=2.7.11.7 +textdistance~=4.6.1 +typing-extensions~=4.11.0 +numpy~=1.26.4 +matplotlib~=3.8.4 +static-frame~=2.6.0 diff --git a/package/requirements_dev.txt b/package/requirements_dev.txt new file mode 100644 index 0000000..8d623da --- /dev/null +++ b/package/requirements_dev.txt @@ -0,0 +1,13 @@ +pip==24 +bump2version==1.0.1 +wheel==0.42.0 +watchdog==4.0.0 +flake8==7.0.0 +tox==4.12.1 +coverage==7.4.1 +Sphinx==7.2.6 +twine==5.0.0 +pylint~=3.1.0 +pytest~=8.1.1 +setuptools~=69.1.0 +mypy==1.9.0 diff --git a/package/setup.cfg b/package/setup.cfg new file mode 100644 index 0000000..4816a39 --- /dev/null +++ b/package/setup.cfg @@ -0,0 +1,20 @@ +[bumpversion] +current_version = 0.1.0 +commit = True +tag = True + +[bumpversion:file:setup.py] +search = version='{current_version}' +replace = version='{new_version}' + +[bumpversion:file:aqudem/__init__.py] +search = __version__ = '{current_version}' +replace = __version__ = '{new_version}' + +[bdist_wheel] +universal = 1 + +[flake8] +exclude = docs +[tool:pytest] +addopts = --ignore=setup.py diff --git a/package/setup.py b/package/setup.py new file mode 100644 index 0000000..cc8794c --- /dev/null +++ b/package/setup.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python + +"""The setup script.""" + +from setuptools import setup, find_packages + +with open('README.rst', encoding="utf-8") as readme_file: + readme = readme_file.read() + +with open('HISTORY.rst', encoding="utf-8") as history_file: + history = history_file.read() + +requirements = ['pm4py~=2.7.11.7', + 'textdistance~=4.6.1', + 'typing-extensions~=4.11.0', + 'numpy~=1.26.4', + 'matplotlib~=3.8.4', + 'static-frame~=2.6.0'] + +test_requirements = ['pytest>=3'] + +setup( + author="Aaron F. Kurz", + author_email='aaron.kurz@unisg.ch', + python_requires='>=3.6', + classifiers=[ + 'Development Status :: 2 - Pre-Alpha', + 'Intended Audience :: Developers', + 'License :: OSI Approved', + 'Natural Language :: English', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + ], + description=("Activity and Sequence Detection Performance Measures: A package to evaluate" + " activity detection results, including the sequence of events given multiple" + " activity types."), + install_requires=requirements, + license='GNU General Public License v3 or later (GPLv3+)', + long_description=readme + '\n\n' + history, + include_package_data=True, + keywords='aqudem', + name='aqudem', + packages=find_packages(include=['aqudem', 'aqudem.*']), + test_suite='tests', + tests_require=test_requirements, + url='https://github.com/ics-unisg/aqudem', + version='0.1.0', + zip_safe=False, +) diff --git a/package/tests/__init__.py b/package/tests/__init__.py new file mode 100644 index 0000000..b7abcbf --- /dev/null +++ b/package/tests/__init__.py @@ -0,0 +1 @@ +"""Unit test package for aqudem.""" diff --git a/package/tests/mocks/logs.py b/package/tests/mocks/logs.py new file mode 100644 index 0000000..558e9a4 --- /dev/null +++ b/package/tests/mocks/logs.py @@ -0,0 +1,220 @@ +from datetime import datetime +import static_frame as sf + +ground_truth_ten_eleven = sf.FrameHE.from_dict({ + "case:concept:name": ["1", "1", "1", "1", "1", "1"], + "concept:name": ["A", "A", "A", "A", "A", "A"], + "lifecycle:transition": ["start", "complete", "start", "complete", "start", "complete"], + "time:timestamp": [datetime(2021, 1, 1, 10, 5), datetime(2021, 1, 1, 10, 25), + datetime(2021, 1, 1, 10, 35), datetime(2021, 1, 1, 10, 38), + datetime(2021, 1, 1, 10, 45), datetime(2021, 1, 1, 10, 55)], +}) +detected_ten_eleven = sf.FrameHE.from_dict({ + "case:concept:name": ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1"], + "concept:name": ["A", "A", "A", "A", "A", "A", "A", "A", "A", "A"], + "lifecycle:transition": ["start", "complete", "start", "complete", "start", "complete", "start", "complete", "start", "complete"], + "time:timestamp": [datetime(2021, 1, 1, 10, 0), datetime(2021, 1, 1, 10, 10), + datetime(2021, 1, 1, 10, 15), datetime(2021, 1, 1, 10, 20), + datetime(2021, 1, 1, 10, 30), datetime(2021, 1, 1, 10, 40), + datetime(2021, 1, 1, 10, 41), datetime(2021, 1, 1, 10, 42), + datetime(2021, 1, 1, 10, 50), datetime(2021, 1, 1, 11, 0)], +}) + + +ground_truth_ten_eighteen = sf.FrameHE.from_dict({ + "case:concept:name": ["1", "1", "1", "1", "1", "1"], + "concept:name": ["A", "A", "A", "A", "A", "A"], + "lifecycle:transition": ["start", "complete", "start", "complete", "start", "complete"], + "time:timestamp": [datetime(2021, 1, 1, 10, 0), datetime(2021, 1, 1, 12, 0), + datetime(2021, 1, 1, 13, 0), datetime(2021, 1, 1, 15, 0), + datetime(2021, 1, 1, 16, 0), datetime(2021, 1, 1, 18, 0)] +}) +detected_ten_eighteen = sf.FrameHE.from_dict({ + "case:concept:name": ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1"], + "concept:name": ["A", "A", "A", "A", "A", "A", "A", "A", "A", "A"], + "lifecycle:transition": ["start", "complete", "start", "complete", "start", "complete", + "start", "complete", "start", "complete"], + "time:timestamp": [datetime(2021, 1, 1, 10, 15), datetime(2021, 1, 1, 11, 0), + datetime(2021, 1, 1, 11, 30), datetime(2021, 1, 1, 12, 30), + datetime(2021, 1, 1, 13, 0), datetime(2021, 1, 1, 14, 0), + datetime(2021, 1, 1, 14, 30), datetime(2021, 1, 1, 15, 30), + datetime(2021, 1, 1, 15, 45), datetime(2021, 1, 1, 17, 0)] +}) + +ground_truth_mult_act_t_e = sf.FrameHE.from_dict({ + "case:concept:name": ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1"], + "concept:name": ["A", "A", "A", "A", "A", "A", "B", "B", "B", "B"], + "lifecycle:transition": ["start", "complete", "start", "complete", "start", "complete", "start", "complete", + "start", "complete"], + "time:timestamp": [datetime(2021, 1, 1, 10, 0), datetime(2021, 1, 1, 12, 0), + datetime(2021, 1, 1, 13, 0), datetime(2021, 1, 1, 15, 0), + datetime(2021, 1, 1, 16, 0), datetime(2021, 1, 1, 18, 0), + datetime(2021, 1, 2, 10, 0), datetime(2021, 1, 2, 12, 0), + datetime(2021, 1, 2, 13, 0), datetime(2021, 1, 2, 15, 0)] +}) +detected_mult_act_t_e = sf.FrameHE.from_dict({ + "case:concept:name": ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"], + "concept:name": ["A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "B", "B", "B", "B"], + "lifecycle:transition": ["start", "complete", "start", "complete", "start", "complete", + "start", "complete", "start", "complete", "start", "complete", "start", "complete"], + "time:timestamp": [datetime(2021, 1, 1, 10, 15), datetime(2021, 1, 1, 11, 0), + datetime(2021, 1, 1, 11, 30), datetime(2021, 1, 1, 12, 30), + datetime(2021, 1, 1, 13, 0), datetime(2021, 1, 1, 14, 0), + datetime(2021, 1, 1, 14, 30), datetime(2021, 1, 1, 15, 30), + datetime(2021, 1, 1, 15, 45), datetime(2021, 1, 1, 17, 0), + datetime(2021, 1, 2, 10, 15), datetime(2021, 1, 2, 11, 0), + datetime(2021, 1, 2, 11, 30), datetime(2021, 1, 2, 12, 30)] +}) + +ground_truth_mixed_activity = sf.FrameHE.from_dict({ + "case:concept:name": ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"], + "concept:name": ["A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B"], + "lifecycle:transition": ["start", "complete", "start", "complete", "start", "complete", "start", "complete", "start", "complete", "start", "complete"], + "time:timestamp": [datetime(2021, 1, 1, 10, 5), datetime(2021, 1, 1, 10, 25), + datetime(2021, 1, 1, 10, 35), datetime(2021, 1, 1, 10, 38), + datetime(2021, 1, 1, 10, 45), datetime(2021, 1, 1, 10, 55), + datetime(2021, 1, 1, 10, 0), datetime(2021, 1, 1, 12, 0), + datetime(2021, 1, 1, 13, 0), datetime(2021, 1, 1, 15, 0), + datetime(2021, 1, 1, 16, 0), datetime(2021, 1, 1, 18, 0)], +}) +detected_mixed_activity = sf.FrameHE.from_dict({ + "case:concept:name": ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", + "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"], + "concept:name": ["A", "A", "A", "A", "A", "A", "A", "A", "A", "A", + "B", "B", "B", "B", "B", "B", "B", "B", "B", "B"], + "lifecycle:transition": ["start", "complete", "start", "complete", "start", "complete", "start", "complete", "start", "complete", + "start", "complete", "start", "complete", "start", "complete", + "start", "complete", "start", "complete"], + "time:timestamp": [datetime(2021, 1, 1, 10, 0), datetime(2021, 1, 1, 10, 10), + datetime(2021, 1, 1, 10, 15), datetime(2021, 1, 1, 10, 20), + datetime(2021, 1, 1, 10, 30), datetime(2021, 1, 1, 10, 40), + datetime(2021, 1, 1, 10, 41), datetime(2021, 1, 1, 10, 42), + datetime(2021, 1, 1, 10, 50), datetime(2021, 1, 1, 11, 0), + datetime(2021, 1, 1, 10, 15), datetime(2021, 1, 1, 11, 0), + datetime(2021, 1, 1, 11, 30), datetime(2021, 1, 1, 12, 30), + datetime(2021, 1, 1, 13, 0), datetime(2021, 1, 1, 14, 0), + datetime(2021, 1, 1, 14, 30), datetime(2021, 1, 1, 15, 30), + datetime(2021, 1, 1, 15, 45), datetime(2021, 1, 1, 17, 0)], +}) + + +ground_truth_mixed_case = sf.FrameHE.from_dict({ + "case:concept:name": ["1", "1", "1", "1", "1", "1", "2", "2", "2", "2", "2", "2"], + "concept:name": ["A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A"], + "lifecycle:transition": ["start", "complete", "start", "complete", "start", "complete", "start", "complete", "start", "complete", "start", "complete"], + "time:timestamp": [datetime(2021, 1, 1, 10, 5), datetime(2021, 1, 1, 10, 25), + datetime(2021, 1, 1, 10, 35), datetime(2021, 1, 1, 10, 38), + datetime(2021, 1, 1, 10, 45), datetime(2021, 1, 1, 10, 55), + datetime(2021, 1, 1, 10, 0), datetime(2021, 1, 1, 12, 0), + datetime(2021, 1, 1, 13, 0), datetime(2021, 1, 1, 15, 0), + datetime(2021, 1, 1, 16, 0), datetime(2021, 1, 1, 18, 0)], +}) +detected_mixed_case = sf.FrameHE.from_dict({ + "case:concept:name": ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", + "2", "2", "2", "2", "2", "2", "2", "2", "2", "2"], + "concept:name": ["A", "A", "A", "A", "A", "A", "A", "A", "A", "A", + "A", "A", "A", "A", "A", "A", "A", "A", "A", "A"], + "lifecycle:transition": ["start", "complete", "start", "complete", "start", "complete", "start", "complete", "start", "complete", + "start", "complete", "start", "complete", "start", "complete", + "start", "complete", "start", "complete"], + "time:timestamp": [datetime(2021, 1, 1, 10, 0), datetime(2021, 1, 1, 10, 10), + datetime(2021, 1, 1, 10, 15), datetime(2021, 1, 1, 10, 20), + datetime(2021, 1, 1, 10, 30), datetime(2021, 1, 1, 10, 40), + datetime(2021, 1, 1, 10, 41), datetime(2021, 1, 1, 10, 42), + datetime(2021, 1, 1, 10, 50), datetime(2021, 1, 1, 11, 0), + datetime(2021, 1, 1, 10, 15), datetime(2021, 1, 1, 11, 0), + datetime(2021, 1, 1, 11, 30), datetime(2021, 1, 1, 12, 30), + datetime(2021, 1, 1, 13, 0), datetime(2021, 1, 1, 14, 0), + datetime(2021, 1, 1, 14, 30), datetime(2021, 1, 1, 15, 30), + datetime(2021, 1, 1, 15, 45), datetime(2021, 1, 1, 17, 0)], +}) + + +_start_end_ten_eleven = {"1": (datetime(2021, 1, 1, 10, 0), + datetime(2021, 1, 1, 11, 0))} +start_end_series_ten_eleven = sf.SeriesHE.from_dict(_start_end_ten_eleven) + + +_start_end_ten_eighteen = {"1": (datetime(2021, 1, 1, 10, 0), + datetime(2021, 1, 1, 18, 0))} +start_end_series_ten_eighteen = sf.SeriesHE.from_dict(_start_end_ten_eighteen) + +_start_end_mult_act_t_e = {"1": (datetime(2021, 1, 1, 10, 0), + datetime(2021, 1, 2, 15, 0))} +start_end_series_mult_act_t_e = sf.SeriesHE.from_dict(_start_end_mult_act_t_e) + + +_start_end_mixed_activity = {"1": (datetime(2021, 1, 1, 10, 0), + datetime(2021, 1, 1, 18, 0))} +start_end_series_mixed_activity = sf.SeriesHE.from_dict(_start_end_mixed_activity) + +_start_end_mixed_case = {"1": (datetime(2021, 1, 1, 10, 0), + datetime(2021, 1, 1, 11, 0)), + "2": (datetime(2021, 1, 1, 10, 0), + datetime(2021, 1, 1, 18, 0))} +start_end_series_mixed_case = sf.SeriesHE.from_dict(_start_end_mixed_case) + +basic_mock_gt = sf.FrameHE.from_dict({ + "case:concept:name": ["1", "1", "1", "1"], + "concept:name": ["TestAct", "TestAct", "TestAct", "TestAct"], + "lifecycle:transition": ["start", "complete", "start", "complete"], + "time:timestamp": [datetime(2023, 12, 13, 10, 0, 0), datetime(2023, 12, 13, 10, 0, 5), + datetime(2023, 12, 13, 10, 0, 8), datetime(2023, 12, 13, 10, 0, 9)], +}) + +basic_mock_det = sf.FrameHE.from_dict({ + "case:concept:name": ["1", "1", "1", "1"], + "concept:name": ["TestAct", "TestAct", "TestAct", "TestAct"], + "lifecycle:transition": ["start", "complete", "start", "complete"], + "time:timestamp": [datetime(2023, 12, 13, 10, 0, 3), datetime(2023, 12, 13, 10, 0, 7), + datetime(2023, 12, 13, 10, 0, 9), datetime(2023, 12, 13, 10, 0, 10)], +}) + +start_end_basic = {"1": (datetime(2023, 12, 13, 10, 0, 0), + datetime(2023, 12, 13, 10, 0, 10))} +start_end_series_basic = sf.SeriesHE.from_dict(start_end_basic) +start_end_series_basic_only_det = sf.SeriesHE.from_dict({"1": (datetime(2023, 12, 13, 10, 0, 3), + datetime(2023, 12, 13, 10, 0, 10))}) + + +basic_mock_gt_1 = sf.FrameHE.from_dict({ + "case:concept:name": ["1", "1"], + "concept:name": ["TestAct", "TestAct"], + "lifecycle:transition": ["start", "complete"], + "time:timestamp": [datetime(2023, 12, 13, 10, 0, 0), datetime(2023, 12, 13, 10, 0, 5)], +}) + +basic_mock_det_1 = sf.FrameHE.from_dict({ + "case:concept:name": ["1", "1"], + "concept:name": ["TestAct", "TestAct"], + "lifecycle:transition": ["start", "complete"], + "time:timestamp": [datetime(2023, 12, 13, 10, 0, 5), datetime(2023, 12, 13, 10, 0, 10)], +}) + +start_end_basic_1 = {"1": (datetime(2023, 12, 13, 10, 0, 0), + datetime(2023, 12, 13, 10, 0, 10))} +start_end_series_basic_1 = sf.SeriesHE.from_dict(start_end_basic_1) +start_end_series_basic_1_only_det = sf.SeriesHE.from_dict({"1": (datetime(2023, 12, 13, 10, 0, 5), + datetime(2023, 12, 13, 10, 0, 10))}) +start_end_series_basic_1_only_gt = sf.SeriesHE.from_dict({"1": (datetime(2023, 12, 13, 10, 0, 0), + datetime(2023, 12, 13, 10, 0, 5))}) + +basic_mock_gt_2 = sf.FrameHE.from_dict({ + "case:concept:name": ["1", "1", "1", "1"], + "concept:name": ["TestAct", "TestAct", "TestAct", "TestAct"], + "lifecycle:transition": ["start", "complete", "start", "complete"], + "time:timestamp": [datetime(2023, 12, 13, 10, 0, 8), datetime(2023, 12, 13, 10, 0, 9), + datetime(2023, 12, 13, 10, 0, 0), datetime(2023, 12, 13, 10, 0, 5)], +}) + +basic_mock_det_2 = sf.FrameHE.from_dict({ + "case:concept:name": ["1", "1", "1", "1"], + "concept:name": ["TestAct", "TestAct", "TestAct", "TestAct"], + "lifecycle:transition": ["start", "complete", "start", "complete"], + "time:timestamp": [datetime(2023, 12, 13, 10, 0, 9), datetime(2023, 12, 13, 10, 0, 10), + datetime(2023, 12, 13, 10, 0, 3), datetime(2023, 12, 13, 10, 0, 7)], +}) + +start_end_basic_2 = {"1": (datetime(2023, 12, 13, 10, 0, 0), + datetime(2023, 12, 13, 10, 0, 10))} +start_end_series_basic_2 = sf.SeriesHE.from_dict(start_end_basic_2) diff --git a/package/tests/resources/23-03-20_det_firstlastlowlevel.xes b/package/tests/resources/23-03-20_det_firstlastlowlevel.xes new file mode 100644 index 0000000..954311f --- /dev/null +++ b/package/tests/resources/23-03-20_det_firstlastlowlevel.xes @@ -0,0 +1,401 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/package/tests/resources/23-03-20_det_firstlastlowlevel_ooo.xes b/package/tests/resources/23-03-20_det_firstlastlowlevel_ooo.xes new file mode 100644 index 0000000..b372bfa --- /dev/null +++ b/package/tests/resources/23-03-20_det_firstlastlowlevel_ooo.xes @@ -0,0 +1,401 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/package/tests/resources/23-03-20_gt_cam.xes b/package/tests/resources/23-03-20_gt_cam.xes new file mode 100644 index 0000000..21f7c56 --- /dev/null +++ b/package/tests/resources/23-03-20_gt_cam.xes @@ -0,0 +1,1235 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/package/tests/resources/23-03-20_gt_cam_ooo.xes b/package/tests/resources/23-03-20_gt_cam_ooo.xes new file mode 100644 index 0000000..5d5c886 --- /dev/null +++ b/package/tests/resources/23-03-20_gt_cam_ooo.xes @@ -0,0 +1,1235 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/package/tests/resources/detected.xes b/package/tests/resources/detected.xes new file mode 100644 index 0000000..c165260 --- /dev/null +++ b/package/tests/resources/detected.xes @@ -0,0 +1,120 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/package/tests/resources/detected_missingactivityname.xes b/package/tests/resources/detected_missingactivityname.xes new file mode 100644 index 0000000..30f1d85 --- /dev/null +++ b/package/tests/resources/detected_missingactivityname.xes @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/package/tests/resources/detected_missinginstance.xes b/package/tests/resources/detected_missinginstance.xes new file mode 100644 index 0000000..a88203d --- /dev/null +++ b/package/tests/resources/detected_missinginstance.xes @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/package/tests/resources/detected_missinglifecycle.xes b/package/tests/resources/detected_missinglifecycle.xes new file mode 100644 index 0000000..7952230 --- /dev/null +++ b/package/tests/resources/detected_missinglifecycle.xes @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/package/tests/resources/detected_missingsamplingfreq.xes b/package/tests/resources/detected_missingsamplingfreq.xes new file mode 100644 index 0000000..92d4839 --- /dev/null +++ b/package/tests/resources/detected_missingsamplingfreq.xes @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/package/tests/resources/detected_missingtimestamp.xes b/package/tests/resources/detected_missingtimestamp.xes new file mode 100644 index 0000000..48dad72 --- /dev/null +++ b/package/tests/resources/detected_missingtimestamp.xes @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/package/tests/resources/detected_missingtracename.xes b/package/tests/resources/detected_missingtracename.xes new file mode 100644 index 0000000..fa056e7 --- /dev/null +++ b/package/tests/resources/detected_missingtracename.xes @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/package/tests/resources/detected_multipletraces.xes b/package/tests/resources/detected_multipletraces.xes new file mode 100644 index 0000000..92899e6 --- /dev/null +++ b/package/tests/resources/detected_multipletraces.xes @@ -0,0 +1,143 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/package/tests/resources/detected_toomanycompletelifecycle.xes b/package/tests/resources/detected_toomanycompletelifecycle.xes new file mode 100644 index 0000000..5dca268 --- /dev/null +++ b/package/tests/resources/detected_toomanycompletelifecycle.xes @@ -0,0 +1,85 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/package/tests/resources/detected_wronglifecycle.xes b/package/tests/resources/detected_wronglifecycle.xes new file mode 100644 index 0000000..8f6ffe8 --- /dev/null +++ b/package/tests/resources/detected_wronglifecycle.xes @@ -0,0 +1,79 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/package/tests/resources/ground_truth.xes b/package/tests/resources/ground_truth.xes new file mode 100644 index 0000000..09d04c4 --- /dev/null +++ b/package/tests/resources/ground_truth.xes @@ -0,0 +1,132 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/package/tests/resources/ground_truth_missingactivityname.xes b/package/tests/resources/ground_truth_missingactivityname.xes new file mode 100644 index 0000000..3167d14 --- /dev/null +++ b/package/tests/resources/ground_truth_missingactivityname.xes @@ -0,0 +1,90 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/package/tests/resources/ground_truth_missinginstance.xes b/package/tests/resources/ground_truth_missinginstance.xes new file mode 100644 index 0000000..6f9f350 --- /dev/null +++ b/package/tests/resources/ground_truth_missinginstance.xes @@ -0,0 +1,91 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/package/tests/resources/ground_truth_missinglifecycle.xes b/package/tests/resources/ground_truth_missinglifecycle.xes new file mode 100644 index 0000000..0577c6f --- /dev/null +++ b/package/tests/resources/ground_truth_missinglifecycle.xes @@ -0,0 +1,90 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/package/tests/resources/ground_truth_missingsamplingfreq.xes b/package/tests/resources/ground_truth_missingsamplingfreq.xes new file mode 100644 index 0000000..b063e93 --- /dev/null +++ b/package/tests/resources/ground_truth_missingsamplingfreq.xes @@ -0,0 +1,90 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/package/tests/resources/ground_truth_missingtimestamp.xes b/package/tests/resources/ground_truth_missingtimestamp.xes new file mode 100644 index 0000000..18c8a10 --- /dev/null +++ b/package/tests/resources/ground_truth_missingtimestamp.xes @@ -0,0 +1,90 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/package/tests/resources/ground_truth_missingtracename.xes b/package/tests/resources/ground_truth_missingtracename.xes new file mode 100644 index 0000000..206535c --- /dev/null +++ b/package/tests/resources/ground_truth_missingtracename.xes @@ -0,0 +1,90 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/package/tests/resources/ground_truth_multipletraces.xes b/package/tests/resources/ground_truth_multipletraces.xes new file mode 100644 index 0000000..9039585 --- /dev/null +++ b/package/tests/resources/ground_truth_multipletraces.xes @@ -0,0 +1,167 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/package/tests/resources/ground_truth_toomanycompletelifecycle.xes b/package/tests/resources/ground_truth_toomanycompletelifecycle.xes new file mode 100644 index 0000000..58b605c --- /dev/null +++ b/package/tests/resources/ground_truth_toomanycompletelifecycle.xes @@ -0,0 +1,97 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/package/tests/resources/ground_truth_wronglifecycle.xes b/package/tests/resources/ground_truth_wronglifecycle.xes new file mode 100644 index 0000000..e428da0 --- /dev/null +++ b/package/tests/resources/ground_truth_wronglifecycle.xes @@ -0,0 +1,91 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/package/tests/test_aqudem.py b/package/tests/test_aqudem.py new file mode 100644 index 0000000..9f77173 --- /dev/null +++ b/package/tests/test_aqudem.py @@ -0,0 +1,63 @@ +"""Tests for `aqudem` package.""" +import os +import aqudem + + +def test_context_creation() -> None: + """Test the creation of a context object.""" + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + for value in context._ground_truth["concept:name"].values: + assert value in ["Activity A", "Activity B", "Activity C"] + for value in context._detected["concept:name"].values: + assert value in ["Activity A", "Activity B", "Activity C"] + for value in context._detected["case:sampling_freq"].values: + assert value == 1.0 + for value in context._ground_truth["case:sampling_freq"].values: + assert value == 1.0 + assert context._ground_truth["case:concept:name"].count(unique=True) == 2 + assert context._detected["case:concept:name"].count(unique=True) == 2 + assert len(context._ground_truth.loc[ + context._ground_truth["lifecycle:transition"] == "complete"]) == 9 + assert len(context._ground_truth.loc[ + context._ground_truth["lifecycle:transition"] == "start"]) == 9 + assert len(context._detected.loc[ + context._detected["lifecycle:transition"] == "complete"]) == 8 + assert len(context._detected.loc[ + context._detected["lifecycle:transition"] == "start"]) == 8 + + +def test_context_creation_multiple_traces() -> None: + """Test the creation of a context object.""" + aqudem.Context(os.path.join("tests", "resources", "ground_truth_multipletraces.xes"), + os.path.join("tests", "resources", "detected_multipletraces.xes")) + + +def test_get_activity_names() -> None: + """Test the get_activity_names method.""" + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + act_names = context.activity_names + assert "Activity A" in act_names["ground_truth"] + assert "Activity B" in act_names["ground_truth"] + assert "Activity C" in act_names["ground_truth"] + assert "Activity A" in act_names["detected"] + assert "Activity B" in act_names["detected"] + assert "Activity C" in act_names["detected"] + assert len(act_names["ground_truth"]) == 3 + assert len(act_names["detected"]) == 3 + + +def test_get_case_ids() -> None: + """Test the get_case_ids method.""" + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth_multipletraces.xes"), + os.path.join("tests", "resources", "detected_multipletraces.xes")) + case_ids = context.case_ids + assert isinstance(case_ids, dict) + assert ["ground_truth", "detected"] == list(case_ids.keys()) + assert "ExampleTrace" in case_ids["ground_truth"] + assert "ExampleTrace2" in case_ids["ground_truth"] + assert "ExampleTrace" in case_ids["detected"] + assert "ExampleTrace2" in case_ids["detected"] + assert len(case_ids["ground_truth"]) == 2 + assert len(case_ids["detected"]) == 2 diff --git a/package/tests/test_cross_correlation.py b/package/tests/test_cross_correlation.py new file mode 100644 index 0000000..0b7a1ab --- /dev/null +++ b/package/tests/test_cross_correlation.py @@ -0,0 +1,174 @@ +""" Tests for the cross correlation functionality of the aqudem package.""" +import os +from unittest.mock import patch, MagicMock +import static_frame +import aqudem +from aqudem.cross_correlation_helper import _cross_correlation, _get_timeseries_format +from .mocks.logs import (basic_mock_gt, basic_mock_det, + start_end_series_basic, + start_end_series_basic_only_det, + basic_mock_gt_1, basic_mock_det_1, + start_end_series_basic_1, + start_end_series_basic_1_only_det, + start_end_series_basic_1_only_gt, + basic_mock_gt_2, basic_mock_det_2, + start_end_series_basic_2,) + +EMPTY_FRAME_HE = static_frame.FrameHE.from_dict({ + "case:concept:name": [], + "time:timestamp": [], + "lifecycle:transition": [], +}) + +def test_get_timeseries_format_basic() -> None: + res = _get_timeseries_format(basic_mock_gt, + basic_mock_det, + 1, + start_end_series_basic) + assert res[0] == [1, 1, 1, 1, 1, 1, -1, -1, 1, 1, -1] + assert res[1] == [-1, -1, -1, 1, 1, 1, 1, 1, -1, 1, 1] + + res = _get_timeseries_format(basic_mock_gt_1, + basic_mock_det_1, + 1, + start_end_series_basic_1) + assert res[0] == [1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1] + assert res[1] == [-1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1] + + res = _get_timeseries_format(basic_mock_gt_2, + basic_mock_det_2, + 1, + start_end_series_basic_2) + assert res[0] == [1, 1, 1, 1, 1, 1, -1, -1, 1, 1, -1] + assert res[1] == [-1, -1, -1, 1, 1, 1, 1, 1, -1, 1, 1] + +def test_get_timeseries_format_basic_2hz() -> None: + res = _get_timeseries_format(basic_mock_gt, + basic_mock_det, + 2, + start_end_series_basic) + assert res[0] == [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + -1, -1, -1, -1, -1, 1, 1, 1, -1, -1] + assert res[1] == [-1, -1, -1, -1, -1, -1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, -1, -1, -1, 1, 1, 1] + +def test_cc_basic() -> None: + cc = _cross_correlation(basic_mock_gt, + basic_mock_det, + start_end_series_basic, + 1) + assert cc == (0.45, -0.2) + + +def test_cc_same_seq_returns_1_0() -> None: + cc = _cross_correlation(basic_mock_det, + basic_mock_det, + start_end_series_basic_only_det, + 1) + assert cc == (1, 0.0) + + +def test_correct_sign_shift() -> None: + cc = _cross_correlation(basic_mock_gt_1, + basic_mock_det_1, + start_end_series_basic_1, + 1) + assert cc == (0.55, -0.5) + + +def test_cc_none() -> None: + """ Check that empty activity list leads to (None, None) return """ + cc1 = _cross_correlation(EMPTY_FRAME_HE, + basic_mock_det_1, + start_end_series_basic_1_only_det, + 1) + cc2 = _cross_correlation(basic_mock_gt_1, + EMPTY_FRAME_HE, + start_end_series_basic_1_only_gt, + 1) + cc3 = _cross_correlation(EMPTY_FRAME_HE, + EMPTY_FRAME_HE, + static_frame.SeriesHE.from_dict({}), + 1) + assert cc1 == (0.0, 0.0) + assert cc2 == (0.0, 0.0) + assert cc3 == (0.0, 0.0) + + +def test_cc_out_of_order() -> None: + """ Basic check with out-of-order lists """ + cc = _cross_correlation(basic_mock_gt_2, + basic_mock_det_2, + start_end_series_basic_2, + 1) + assert cc == (0.45, -0.2) + + +@patch('aqudem.cross_correlation_helper._get_timeseries_format') +def test_cc_correct_behavior_one_activity(mock_get_timeseries_format: MagicMock) -> None: + # Note that the function inputs are not used in the function, + # because the mock is used to control the return value + mock_get_timeseries_format.return_value = ([-1, -1, 1, -1, -1], [-1, -1, 1, -1, -1]) + cc = _cross_correlation(basic_mock_gt, + basic_mock_det, + start_end_series_basic, + 1) + assert cc == (1.0, 0.0) + mock_get_timeseries_format.return_value = ([-1, -1, 1, 1, 1], [-1, -1, 1, 1, 1]) + cc = _cross_correlation(basic_mock_gt, + basic_mock_det, + start_end_series_basic, + 1) + assert cc == (1.0, 0.0) + mock_get_timeseries_format.return_value = ([-1, -1, -1, -1, -1], [-1, -1, 1, -1, -1]) + cc = _cross_correlation(basic_mock_gt, + basic_mock_det, + start_end_series_basic, + 1) + assert cc == (0.6, 0.0) + + +@patch('aqudem.cross_correlation_helper._get_timeseries_format') +def test_cc_correct_shift_behavior(mock_get_timeseries_format: MagicMock) -> None: + # Note that the function inputs are not used in the function, + # because the mock is used to control the return value + mock_get_timeseries_format.return_value = ([-1, -1, 1, -1, -1], [-1, -1, -1, 1, -1]) + cc = _cross_correlation(basic_mock_gt, + basic_mock_det, + start_end_series_basic, + 1) + assert cc == (0.8, -0.25) + + +def test_context_cross_correlation() -> None: + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.cross_correlation(activity_name="Activity A", case_id="ExampleTrace1") + assert res == (0.73, 0.0) + + +def test_context_cross_correlation_by_activity() -> None: + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.cross_correlation(activity_name="Activity A") + assert res == (0.64, 0.225) + res = context.cross_correlation(activity_name="Activity B") + assert res == (0.795, 0.165) + res = context.cross_correlation(activity_name="Activity C") + assert res == (0.83, -0.03) + + +def test_context_cross_correlation_by_case() -> None: + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.cross_correlation(case_id="ExampleTrace1") + assert res == (0.8267, -0.01) + res = context.cross_correlation(case_id="ExampleTrace2") + assert res == (0.61, 0.39) + + +def test_context_cross_correlation_by_case_and_activity() -> None: + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.cross_correlation() + assert res == (0.74, 0.15) diff --git a/package/tests/test_damerau_levenshtein.py b/package/tests/test_damerau_levenshtein.py new file mode 100644 index 0000000..0a02d90 --- /dev/null +++ b/package/tests/test_damerau_levenshtein.py @@ -0,0 +1,262 @@ +"""Test damerau levenshtein dist norm that is exposed in the context class and helper functions.""" +import os +from datetime import datetime +import pytest +import static_frame as sf +import aqudem +from aqudem.damerau_levenshtein_helper import (_map_strings_to_letters, + _damerau_opt_levenshtein_dist, + _levenshtein_distancy_by_case, + _damerau_levenshtein_distancy_by_case) + + +def test_basic_map_strings() -> None: + res1, res2 = _map_strings_to_letters(["i", "think", "i", "am", "think", "think", "am", "i"], + ["therefore", "therefore", "am", + "i", "i", "am", "therefore", "am"]) + assert res1 == "ABACBBCA" + assert res2 == "DDCAACDC" + + +def test_different_lengths_map_strings() -> None: + res1, res2 = _map_strings_to_letters(["i", "think", "i", "am"], + ["therefore", "therefore", "am", "i", + "i", "am", "therefore", "am"]) + assert res1 == "ABAC" + assert res2 == "DDCAACDC" + + +def test_empty_lengths_map_strings() -> None: + res1, res2 = _map_strings_to_letters([], + ["therefore", "therefore", "am", "i", + "i", "am", "therefore", "am"]) + assert res1 == "" + assert res2 == "AABCCBAB" + res1, res2 = _map_strings_to_letters(["i", "think", "i", "am"], + []) + assert res1 == "ABAC" + assert res2 == "" + + +def test_basic_damerau_opt_levenshtein_dist_norm() -> None: + gt = sf.FrameHE.from_dict({ + "case:concept:name": ["ExampleTrace", "ExampleTrace", "ExampleTrace", "ExampleTrace"], + "lifecycle:transition": ["start", "complete", "start", "complete"], + "concept:name": ["A", "A", "B", "B"], + "time:timestamp": [datetime(2021, 1, 1, 0, 0, 0), + datetime(2021, 1, 1, 0, 10, 0), + datetime(2021, 1, 1, 0, 15, 0), + datetime(2021, 1, 1, 0, 20, 0)] + }) + det = sf.FrameHE.from_dict({ + "case:concept:name": ["ExampleTrace", "ExampleTrace", "ExampleTrace", "ExampleTrace"], + "lifecycle:transition": ["start", "complete", "start", "complete"], + "concept:name": ["A", "A", "B", "B"], + "time:timestamp": [datetime(2021, 1, 1, 0, 0, 0), + datetime(2021, 1, 1, 0, 10, 0), + datetime(2021, 1, 1, 0, 15, 0), + datetime(2021, 1, 1, 0, 20, 0)] + }) + res = _damerau_opt_levenshtein_dist(gt, det) + assert res[1] == 0.0 + + +def test_swap_damerau_opt_levenshtein_dist_norm() -> None: + gt = sf.FrameHE.from_dict({ + "case:concept:name": ["ExampleTrace", "ExampleTrace"], + "lifecycle:transition": ["start", "start"], + "concept:name": ["A", "B"], + "time:timestamp": [datetime(2021, 1, 1, 0, 0, 0), + datetime(2021, 1, 1, 0, 10, 0)] + }) + det = sf.FrameHE.from_dict({ + "case:concept:name": ["ExampleTrace", "ExampleTrace"], + "lifecycle:transition": ["start", "start"], + "concept:name": ["B", "A"], + "time:timestamp": [datetime(2021, 1, 1, 0, 0, 0), + datetime(2021, 1, 1, 0, 10, 0)] + }) + res = _damerau_opt_levenshtein_dist(gt, det) + assert res[1] == 0.5 + + +def test_change_damerau_opt_levenshtein_dist_norm() -> None: + gt = sf.FrameHE.from_dict({ + "case:concept:name": ["ExampleTrace", "ExampleTrace"], + "lifecycle:transition": ["start", "start"], + "concept:name": ["A", "B"], + "time:timestamp": [datetime(2021, 1, 1, 0, 0, 0), + datetime(2021, 1, 1, 0, 10, 0)] + }) + det = sf.FrameHE.from_dict({ + "case:concept:name": ["ExampleTrace", "ExampleTrace"], + "lifecycle:transition": ["start", "start"], + "concept:name": ["B", "A"], + "time:timestamp": [datetime(2021, 1, 1, 0, 0, 0), + datetime(2021, 1, 1, 0, 10, 0)] + }) + res = _damerau_opt_levenshtein_dist(gt, det) + assert res[1] == 0.5 + res = _damerau_opt_levenshtein_dist(gt, det, metr_type="lev") + assert res[1] == 1.0 + + +def test_change_damerau_opt_levenshtein_dist() -> None: + gt = sf.FrameHE.from_dict({ + "case:concept:name": ["ExampleTrace", "ExampleTrace"], + "lifecycle:transition": ["start", "start"], + "concept:name": ["A", "B"], + "time:timestamp": [datetime(2021, 1, 1, 0, 0, 0), + datetime(2021, 1, 1, 0, 10, 0)] + }) + det = sf.FrameHE.from_dict({ + "case:concept:name": ["ExampleTrace", "ExampleTrace"], + "lifecycle:transition": ["start", "start"], + "concept:name": ["B", "A"], + "time:timestamp": [datetime(2021, 1, 1, 0, 0, 0), + datetime(2021, 1, 1, 0, 10, 0)] + }) + res = _damerau_opt_levenshtein_dist(gt, det) + assert res[0] == 1 + res = _damerau_opt_levenshtein_dist(gt, det, metr_type="lev") + assert res[0] == 2 + + +def test_change_damerau_opt_levenshtein() -> None: + gt = sf.FrameHE.from_dict({ + "case:concept:name": ["ExampleTrace", "ExampleTrace", "ExampleTrace2", + "ExampleTrace2", "ExampleTrace2", "ExampleTrace2"], + "lifecycle:transition": ["start", "start", "start", "start", "start", "start"], + "concept:name": ["A", "B", "A", "B", "A", "B"], + "time:timestamp": [datetime(2021, 1, 1, 0, 0, 0), + datetime(2021, 1, 1, 0, 10, 0), + datetime(2021, 1, 1, 0, 0, 0), + datetime(2021, 1, 1, 0, 10, 0), + datetime(2021, 1, 1, 0, 20, 0), + datetime(2021, 1, 1, 0, 30, 0)] + }) + det = sf.FrameHE.from_dict({ + "case:concept:name": ["ExampleTrace", "ExampleTrace", "ExampleTrace2", + "ExampleTrace2", "ExampleTrace2", "ExampleTrace2"], + "lifecycle:transition": ["start", "start", "start", "start", "start", "start"], + "concept:name": ["B", "A", "B", "A", "C", "C"], + "time:timestamp": [datetime(2021, 1, 1, 0, 0, 0), + datetime(2021, 1, 1, 0, 10, 0), + datetime(2021, 1, 1, 0, 0, 0), + datetime(2021, 1, 1, 0, 10, 0), + datetime(2021, 1, 1, 0, 20, 0), + datetime(2021, 1, 1, 0, 30, 0)] + }) + res = _damerau_levenshtein_distancy_by_case(gt, det, case_id="ExampleTrace") + assert res == (1, 0.5) + res = _damerau_levenshtein_distancy_by_case(gt, det, case_id="ExampleTrace2") + assert res == (3, 0.75) + res = _damerau_levenshtein_distancy_by_case(gt, det, case_id="*") + assert res == (2, 0.625) + res = _levenshtein_distancy_by_case(gt, det, case_id="ExampleTrace") + assert res == (2, 1.0) + res = _levenshtein_distancy_by_case(gt, det, case_id="ExampleTrace2") + assert res == (3, 0.75) + res = _levenshtein_distancy_by_case(gt, det, case_id="*") + assert res == (2.5, 0.875) + + +def test_change_all_damerau_opt_levenshtein_dist_norm() -> None: + gt = sf.FrameHE.from_dict({ + "case:concept:name": ["ExampleTrace", "ExampleTrace"], + "lifecycle:transition": ["start", "start"], + "concept:name": ["A", "B"], + "time:timestamp": [datetime(2021, 1, 1, 0, 0, 0), + datetime(2021, 1, 1, 0, 10, 0)] + }) + det = sf.FrameHE.from_dict({ + "case:concept:name": ["ExampleTrace", "ExampleTrace"], + "lifecycle:transition": ["start", "start"], + "concept:name": ["C", "C"], + "time:timestamp": [datetime(2021, 1, 1, 0, 0, 0), + datetime(2021, 1, 1, 0, 10, 0)] + }) + res = _damerau_opt_levenshtein_dist(gt, det) + assert res[1] == 1.0 + gt = sf.FrameHE.from_dict({ + "case:concept:name": ["ExampleTrace", "ExampleTrace"], + "lifecycle:transition": ["start", "start"], + "concept:name": ["A", "B"], + "time:timestamp": [datetime(2021, 1, 1, 0, 0, 0), + datetime(2021, 1, 1, 0, 10, 0)] + }) + det = sf.FrameHE.from_dict({ + "case:concept:name": [], + "lifecycle:transition": [], + "concept:name": [], + "time:timestamp": [] + }) + res = _damerau_opt_levenshtein_dist(gt, det) + assert res[1] == 1.0 + + +def test_empty_damerau_opt_levenshtein_dist_norm() -> None: + gt = sf.FrameHE.from_dict({ + "case:concept:name": ["ExampleTrace", "ExampleTrace"], + "lifecycle:transition": ["start", "start"], + "concept:name": ["A", "B"], + "time:timestamp": [datetime(2021, 1, 1, 0, 0, 0), + datetime(2021, 1, 1, 0, 10, 0)] + }) + det = sf.FrameHE.from_dict({ + "case:concept:name": [], + "lifecycle:transition": [], + "concept:name": [], + "time:timestamp": [] + }) + res = _damerau_opt_levenshtein_dist(gt, det) + assert res[1] == 1.0 + gt = sf.FrameHE.from_dict({ + "case:concept:name": [], + "lifecycle:transition": [], + "concept:name": [], + "time:timestamp": [] + }) + det = sf.FrameHE.from_dict({ + "case:concept:name": [], + "lifecycle:transition": [], + "concept:name": [], + "time:timestamp": [] + }) + res = _damerau_opt_levenshtein_dist(gt, det) + assert res[1] == 0.0 + + +def test_context_damerau_levenshtein() -> None: + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.damerau_levenshtein_distance(case_id="ExampleTrace1") + assert res == (2, 0.3333) + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.damerau_levenshtein_distance(case_id="ExampleTrace2") + assert res == (1, 0.3333) + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.damerau_levenshtein_distance() + assert res == (1.5, 0.3333) + + +def test_context_levenshtein() -> None: + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.levenshtein_distance(case_id="ExampleTrace2") + assert res == (2, 0.6667) + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.levenshtein_distance(case_id="ExampleTrace1") + assert res == (2, 0.3333) + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.levenshtein_distance() + assert res == (2.0, 0.5) + + +def test_wrong_metr_type() -> None: + with pytest.raises(ValueError): + _damerau_opt_levenshtein_dist(sf.FrameHE(), sf.FrameHE(), metr_type="wrong") diff --git a/package/tests/test_e2e.py b/package/tests/test_e2e.py new file mode 100644 index 0000000..8ee868f --- /dev/null +++ b/package/tests/test_e2e.py @@ -0,0 +1,257 @@ +""" Testing the aqudem package end-to-end. """ +import os +import pytest +import static_frame as sf +import aqudem + +ACTIVIES_GT = ['OV Burn', 'Sorting Machine sort', 'WT Transport', 'Calibrate VGR', + 'Get Workpiece from Pickup Station', 'Pickup and transport to sink', + 'Pickup and transport to Oven', 'Unload from HBW', 'Store Workpiece in HBW', + 'Calibrate HBW', 'Start Milling Machine', 'Move to DPS'] +ACTIVITIES_DET = ['WT Transport', 'Get Workpiece from Pickup Station', + 'Store Workpiece in HBW', 'Calibrate HBW', 'Read Color', 'Move to DPS'] +CASES_GT = ['case1'] +CASES_DET = ['case1'] +act_only_in_one_log = set(ACTIVIES_GT) ^ set(ACTIVITIES_DET) +case_only_in_one_log = set(CASES_GT) ^ set(CASES_DET) + + +def _validate_two_set(two_set: aqudem.TwoSet) -> None: + assert isinstance(two_set, aqudem.TwoSet) + for key in ["tp", "tn", "d", "f", "ua", "uo", "i", "m", "oa", "oo", + "p", "n", "t", + "tpr", "tnr", "dr", "fr", "uar", "uor", "ir", "mr", "oar", "oor"]: + assert isinstance(getattr(two_set, key), (int, float)) + assert two_set.tp + two_set.d + two_set.f + two_set.ua + two_set.uo == two_set.p + assert two_set.tn + two_set.i + two_set.m + two_set.oa + two_set.oo == two_set.n + assert round(two_set.p + two_set.n, 2) == round(two_set.t, 2) + assert round(two_set.tpr + two_set.dr + two_set.fr + two_set.uar + two_set.uor, 4) - 1 < 0.001 + assert round(two_set.tnr + two_set.ir + two_set.mr + two_set.oar + two_set.oor, 4) - 1 < 0.001 + + +def _validate_two_set_zero(two_set: aqudem.TwoSet) -> None: + assert isinstance(two_set, aqudem.TwoSet) + for key in ["tp", "f", "ua", "uo", "m", "oa", "oo", + "tpr", "fr", "uar", "uor", "mr", "oar", "oor"]: + assert getattr(two_set, key) == 0 + assert two_set.d > 0 or two_set.i > 0 + assert not (two_set.d > 0 and two_set.i > 0) + assert two_set.dr > 0 or two_set.ir > 0 + assert not (two_set.dr > 0 and two_set.ir > 0) + + +def _validate_event_analysis(ea: aqudem.EventAnalysis) -> None: + assert isinstance(ea, aqudem.EventAnalysis) + for key in ["d", "f", "fm", "m", "c", "md", "fmd", "fd", "id", + "total_gt_events", "total_det_events", "correct_events_per_log", + "dr", "fr", "fmr", "mr", "cr_gt", "cr_det", "mdr", "fmdr", "fdr", "idr"]: + assert isinstance(getattr(ea, key), (int, float)) + assert (ea.dr + ea.fr + ea.fmr + ea.mr + ea.cr_gt) - 1 < 0.001 + assert (ea.mdr + ea.fmdr + ea.fdr + ea.idr + ea.cr_det) - 1 < 0.001 + assert (ea.d + ea.f + ea.fm + ea.m + (ea.c / 2)) - ea.total_gt_events < 0.001 + assert (ea.md + ea.fd + ea.fmd + ea.id + (ea.c / 2)) - ea.total_det_events < 0.001 + + +def _validate_event_analysis_zero(ea: aqudem.EventAnalysis) -> None: + assert isinstance(ea, aqudem.EventAnalysis) + for key in ["f", "fm", "m", "c", "md", "fmd", "fd", + "correct_events_per_log", + "fr", "fmr", "mr", "cr_gt", "cr_det", "mdr", "fmdr", "fdr"]: + assert getattr(ea, key) == 0 + assert ea.d > 0 or ea.id > 0 + assert not (ea.d > 0 and ea.id > 0) + assert ea.dr > 0 or ea.idr > 0 + assert not (ea.dr > 0 and ea.idr > 0) + assert ea.total_gt_events > 0 or ea.total_det_events > 0 + assert not (ea.total_gt_events > 0 and ea.total_det_events > 0) + + +@pytest.fixture(scope="module", name='context') +def fixture_context() -> aqudem.Context: + return aqudem.Context(os.path.join("tests", "resources", "23-03-20_gt_cam.xes"), + os.path.join("tests", "resources", "23-03-20_det_firstlastlowlevel.xes")) + + +def test_context_properties(context: aqudem.Context) -> None: + assert isinstance(context.ground_truth, sf.FrameHE) + assert isinstance(context.detected, sf.FrameHE) + assert context.ground_truth.shape[0] <= 204 + assert context.ground_truth.shape[1] == 6 + for column in ["case:concept:name", "case:sampling_freq", "concept:name", + "lifecycle:transition", "time:timestamp", "concept:instance"]: + assert column in context.ground_truth.columns + assert context.detected.shape[0] <= 78 + assert context.detected.shape[1] == 5 + for column in ["case:concept:name", "concept:name", "lifecycle:transition", + "time:timestamp", "case:sampling_freq"]: + assert column in context.detected.columns + assert isinstance(context.activity_names, dict) + assert isinstance(context.case_ids, dict) + + +def test_cross_correlation(context: aqudem.Context) -> None: + cross_correlation = context.cross_correlation() + assert isinstance(cross_correlation, tuple) + assert isinstance(cross_correlation[0], (float, int)) + assert isinstance(cross_correlation[1], float) + + for act in set(ACTIVIES_GT + ACTIVITIES_DET): + cross_correlation_act = context.cross_correlation(activity_name=act) + assert isinstance(cross_correlation_act, tuple) + assert isinstance(cross_correlation_act[0], (float, int)) + assert isinstance(cross_correlation_act[1], float) + + for cas in set(CASES_GT + CASES_DET): + cross_correlation_case = context.cross_correlation(case_id=cas) + assert isinstance(cross_correlation_case, tuple) + assert isinstance(cross_correlation_case[0], (float, int)) + assert isinstance(cross_correlation_case[1], float) + + for act in set(ACTIVIES_GT + ACTIVITIES_DET): + for cas in set(CASES_GT + CASES_DET): + cross_correlation_act_case = context.cross_correlation(activity_name=act, + case_id=cas) + assert isinstance(cross_correlation_act_case, tuple) + assert isinstance(cross_correlation_act_case[0], (float, int)) + assert isinstance(cross_correlation_act_case[1], float) + + # for activities that are only in one log, make sure that they are logically ZERO + for act in act_only_in_one_log: + cross_correlation_act = context.cross_correlation(activity_name=act) + assert cross_correlation_act[0] == 0 + assert cross_correlation_act[1] == 0 + + # for cases that are only in one log, make sure that they are logically ZERO + for cas in case_only_in_one_log: + cross_correlation_case = context.cross_correlation(case_id=cas) + assert cross_correlation_case[0] == 0 + assert cross_correlation_case[1] == 0 + + # for tha case + act query from above, it should also be logically ZERO + for act in act_only_in_one_log: + for cas in case_only_in_one_log: + cross_correlation_act_case = context.cross_correlation(activity_name=act, + case_id=cas) + assert cross_correlation_act_case[0] == 0 + assert cross_correlation_act_case[1] == 0 + + +def test_two_set(context: aqudem.Context) -> None: + two_set = context.two_set() + _validate_two_set(two_set) + + for act in set(ACTIVIES_GT + ACTIVITIES_DET): + two_set_act = context.two_set(activity_name=act) + _validate_two_set(two_set_act) + + for cas in set(CASES_GT + CASES_DET): + two_set_case = context.two_set(case_id=cas) + _validate_two_set(two_set_case) + + for act in set(ACTIVIES_GT + ACTIVITIES_DET): + for cas in set(CASES_GT + CASES_DET): + two_set_act_case = context.two_set(activity_name=act, case_id=cas) + _validate_two_set(two_set_act_case) + + for act in act_only_in_one_log: + two_set_act = context.two_set(activity_name=act) + _validate_two_set_zero(two_set_act) + + for cas in case_only_in_one_log: + two_set_case = context.two_set(case_id=cas) + _validate_two_set_zero(two_set_case) + + for act in act_only_in_one_log: + for cas in case_only_in_one_log: + two_set_act_case = context.two_set(activity_name=act, case_id=cas) + _validate_two_set_zero(two_set_act_case) + + +def test_event_analysis(context: aqudem.Context) -> None: + ea = context.event_analysis() + _validate_event_analysis(ea) + + for act in set(ACTIVIES_GT + ACTIVITIES_DET): + ea_act = context.event_analysis(activity_name=act) + _validate_event_analysis(ea_act) + + for cas in set(CASES_GT + CASES_DET): + ea_case = context.event_analysis(case_id=cas) + _validate_event_analysis(ea_case) + + for act in set(ACTIVIES_GT + ACTIVITIES_DET): + for cas in set(CASES_GT + CASES_DET): + ea_act_case = context.event_analysis(activity_name=act, case_id=cas) + _validate_event_analysis(ea_act_case) + + for act in act_only_in_one_log: + ea_act = context.event_analysis(activity_name=act) + _validate_event_analysis_zero(ea_act) + + for cas in case_only_in_one_log: + ea_case = context.event_analysis(case_id=cas) + _validate_event_analysis_zero(ea_case) + + for act in act_only_in_one_log: + for cas in case_only_in_one_log: + ea_act_case = context.event_analysis(activity_name=act, case_id=cas) + _validate_event_analysis_zero(ea_act_case) + + +def test_damerau_levenshtein_distance(context: aqudem.Context) -> None: + dld = context.damerau_levenshtein_distance() + assert isinstance(dld[0], (int, float)) + assert isinstance(dld[1], float) + assert dld[1] <= 1 + + for cas in set(CASES_GT + CASES_DET): + dld_case = context.damerau_levenshtein_distance(case_id=cas) + assert isinstance(dld_case[0], (int, float)) + assert isinstance(dld_case[1], float) + assert dld_case[1] <= 1 + + for cas in case_only_in_one_log: + dld_case = context.damerau_levenshtein_distance(case_id=cas) + assert dld_case[0] > 0 + assert dld_case[1] > 0 + + +def test_levenshtein_distance(context: aqudem.Context) -> None: + ld = context.levenshtein_distance() + assert isinstance(ld[0], (int, float)) + assert isinstance(ld[1], float) + assert ld[1] <= 1 + + for cas in set(CASES_GT + CASES_DET): + ld_case = context.levenshtein_distance(case_id=cas) + assert isinstance(ld_case[0], (int, float)) + assert isinstance(ld_case[1], float) + assert ld_case[1] <= 1 + + for cas in case_only_in_one_log: + ld_case = context.levenshtein_distance(case_id=cas) + assert ld_case[0] > 0 + assert ld_case[1] > 0 + + +def test_different_ordering_xes_same_result(context: aqudem.Context) -> None: + context2 = aqudem.Context(os.path.join("tests", + "resources", + "23-03-20_gt_cam_ooo.xes"), + os.path.join("tests", + "resources", + "23-03-20_det_firstlastlowlevel_ooo.xes")) + assert context.activity_names == context2.activity_names + assert context.case_ids == context2.case_ids + assert context.cross_correlation() == context2.cross_correlation() + assert context.two_set() == context2.two_set() + assert context.event_analysis() == context2.event_analysis() + assert context.damerau_levenshtein_distance() == context2.damerau_levenshtein_distance() + assert context.levenshtein_distance() == context2.levenshtein_distance() + + +def test_wrong_case_or_activity_raises_exception(context: aqudem.Context) -> None: + with pytest.raises(ValueError): + context.two_set(activity_name="wrong_activity_name") + with pytest.raises(ValueError): + context.two_set(case_id="wrong_case_id") diff --git a/package/tests/test_event_analysis_helper.py b/package/tests/test_event_analysis_helper.py new file mode 100644 index 0000000..3e6fdc3 --- /dev/null +++ b/package/tests/test_event_analysis_helper.py @@ -0,0 +1,525 @@ +"""Tests for the event_analysis_helper module.""" +import os +import aqudem +from aqudem.event_analysis_helper import (_event_analysis, EventAnalysis, + _event_analysis_by_activity_case) +from .mocks.logs import (ground_truth_ten_eighteen, detected_ten_eighteen, + ground_truth_ten_eleven, detected_ten_eleven, + ground_truth_mixed_activity, detected_mixed_activity, + ground_truth_mixed_case, detected_mixed_case, + start_end_series_ten_eleven, start_end_series_ten_eighteen, + start_end_series_mixed_activity, start_end_series_mixed_case) + + +def _verify_ten_eleven_metrics(result: EventAnalysis) -> None: + """ Verify the metrics for the 10-11 logs.""" + assert isinstance(result, EventAnalysis) + assert result.d == 0 + assert result.f == 1 + assert result.fm == 0 + assert result.m == 0 + assert result.c == 4 + assert result.md == 0 + assert result.fmd == 0 + assert result.fd == 2 + assert result.id == 1 + assert result.total_gt_events == 3 + assert result.total_det_events == 5 + + +def _verify_ten_eleven_rates(result: EventAnalysis) -> None: + """ Verify the rates for the 10-11 logs.""" + assert isinstance(result, EventAnalysis) + assert result.dr == 0.0 + assert result.fr == 0.3333 + assert result.fmr == 0.0 + assert result.mr == 0.0 + assert result.cr_gt == 0.6667 + assert result.mdr == 0.0 + assert result.fdr == 0.4 + assert result.idr == 0.2 + assert result.cr_det == 0.4 + + +def _verify_ten_eighteen_metrics(result: EventAnalysis) -> None: + """ Verify the metrics for the 10-18 logs.""" + assert isinstance(result, EventAnalysis) + assert result.d == 0 + assert result.f == 2 + assert result.fm == 0 + assert result.m == 0 + assert result.c == 2 + assert result.md == 0 + assert result.fmd == 0 + assert result.fd == 4 + assert result.id == 0 + assert result.total_gt_events == 3 + assert result.total_det_events == 5 + + +def _verify_ten_eighteen_rates(result: EventAnalysis) -> None: + """ Verify the rates for the 10-18 logs.""" + assert isinstance(result, EventAnalysis) + assert result.dr == 0.0 + assert result.fr == 0.6667 + assert result.fmr == 0.0 + assert result.mr == 0.0 + assert result.cr_gt == 0.3333 + assert result.mdr == 0.0 + assert result.fdr == 0.8 + assert result.idr == 0.0 + assert result.cr_det == 0.2 + + +def test_event_analysis_ten_eleven() -> None: + """ Test the event analysis for the 10-11 logs.""" + result = _event_analysis(ground_truth_ten_eleven, + detected_ten_eleven, + start_end_series_ten_eleven) + _verify_ten_eleven_metrics(result) + + +def test_event_analysis_rates_ten_eleven() -> None: + """ Test the event analysis rates for the 10-11 logs.""" + result = _event_analysis(ground_truth_ten_eleven, + detected_ten_eleven, + start_end_series_ten_eleven) + _verify_ten_eleven_rates(result) + + +def test_event_analysis_ten_eighteen() -> None: + result = _event_analysis(ground_truth_ten_eighteen, + detected_ten_eighteen, + start_end_series_ten_eighteen) + _verify_ten_eighteen_metrics(result) + + +def test_event_analysis_rates_ten_eighteen() -> None: + result = _event_analysis(ground_truth_ten_eighteen, + detected_ten_eighteen, + start_end_series_ten_eighteen) + _verify_ten_eighteen_rates(result) + + +def test_event_analysis_by_activity() -> None: + result = _event_analysis_by_activity_case(ground_truth_mixed_activity, + detected_mixed_activity, + "A", + "1", + start_end_series_mixed_activity) + _verify_ten_eleven_metrics(result) + result = _event_analysis_by_activity_case(ground_truth_mixed_activity, + detected_mixed_activity, + "B", + "1", + start_end_series_mixed_activity) + _verify_ten_eighteen_metrics(result) + result = _event_analysis_by_activity_case(ground_truth_mixed_activity, + detected_mixed_activity, + "*", + "1", + start_end_series_mixed_activity) + assert isinstance(result, EventAnalysis) + assert result.d == 0 + assert result.f == 1.5 + assert result.fm == 0 + assert result.m == 0 + assert result.c == 3 + assert result.md == 0 + assert result.fmd == 0 + assert result.fd == 3 + assert result.id == 0.5 + assert result.total_gt_events == 3 + assert result.total_det_events == 5 + + +def test_event_analysis_rates_by_activity() -> None: + result = _event_analysis_by_activity_case(ground_truth_mixed_activity, + detected_mixed_activity, + "A", + "1", + start_end_series_mixed_activity) + _verify_ten_eleven_rates(result) + result = _event_analysis_by_activity_case(ground_truth_mixed_activity, + detected_mixed_activity, + "B", + "1", + start_end_series_mixed_activity) + _verify_ten_eighteen_rates(result) + result = _event_analysis_by_activity_case(ground_truth_mixed_activity, + detected_mixed_activity, + "*", + "1", + start_end_series_mixed_activity) + assert isinstance(result, EventAnalysis) + assert result.dr == 0.0 + assert result.fr == 0.5 + assert result.fmr == 0.0 + assert result.mr == 0.0 + assert result.cr_gt == 0.5 + assert result.mdr == 0.0 + assert result.fdr == 0.6 + assert result.idr == 0.1 + assert result.cr_det == 0.3 + + +def test_event_analysis_by_activity_case() -> None: + result = _event_analysis_by_activity_case(ground_truth_mixed_case, + detected_mixed_case, + "*", + "1", + start_end_series_mixed_case) + _verify_ten_eleven_metrics(result) + result = _event_analysis_by_activity_case(ground_truth_mixed_case, + detected_mixed_case, + "*", + "2", + start_end_series_mixed_case) + _verify_ten_eighteen_metrics(result) + result = _event_analysis_by_activity_case(ground_truth_mixed_case, + detected_mixed_case, + "*", + "*", + start_end_series_mixed_case) + assert isinstance(result, EventAnalysis) + assert result.d == 0 + assert result.f == 1.5 + assert result.fm == 0 + assert result.m == 0 + assert result.c == 3 + assert result.md == 0 + assert result.fmd == 0 + assert result.fd == 3 + assert result.id == 0.5 + assert result.total_gt_events == 3 + assert result.total_det_events == 5 + + +def test_event_analysis_rates_by_activity_case() -> None: + result = _event_analysis_by_activity_case(ground_truth_mixed_case, + detected_mixed_case, + "*", + "1", + start_end_series_mixed_case) + _verify_ten_eleven_rates(result) + result = _event_analysis_by_activity_case(ground_truth_mixed_case, + detected_mixed_case, + "*", + "2", + start_end_series_mixed_case) + _verify_ten_eighteen_rates(result) + result = _event_analysis_by_activity_case(ground_truth_mixed_case, + detected_mixed_case, + "*", + "*", + start_end_series_mixed_case) + assert isinstance(result, EventAnalysis) + assert result.dr == 0.0 + assert result.fr == 0.5 + assert result.fmr == 0.0 + assert result.mr == 0.0 + assert result.cr_gt == 0.5 + assert result.mdr == 0.0 + assert result.fdr == 0.6 + assert result.idr == 0.1 + assert result.cr_det == 0.3 + + +def test_context_ea_1_a() -> None: + """Test EA metrics that are exposed in the context class for trace 1, activity A.""" + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.event_analysis(activity_name="Activity A", case_id="ExampleTrace1") + assert isinstance(res, EventAnalysis) + assert res.d == 0 + assert res.f == 1 + assert res.fm == 0 + assert res.m == 0 + assert res.c == 2 + assert res.md == 0 + assert res.fmd == 0 + assert res.fd == 2 + assert res.id == 0 + + +def test_context_ea_1_b() -> None: + """Test EA metrics that are exposed in the context class for trace 1, activity B.""" + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.event_analysis(activity_name="Activity B", case_id="ExampleTrace1") + assert isinstance(res, EventAnalysis) + assert res.d == 0 + assert res.f == 0 + assert res.fm == 0 + assert res.m == 2 + assert res.c == 0 + assert res.md == 1 + assert res.fmd == 0 + assert res.fd == 0 + assert res.id == 0 + + +def test_context_ea_1_c() -> None: + """Test EA metrics that are exposed in the context class for trace 1, activity C.""" + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.event_analysis(activity_name="Activity C", case_id="ExampleTrace1") + assert isinstance(res, EventAnalysis) + assert res.d == 1 + assert res.f == 0 + assert res.fm == 0 + assert res.m == 0 + assert res.c == 2 + assert res.md == 0 + assert res.fmd == 0 + assert res.fd == 0 + assert res.id == 0 + + +def test_context_ea_2_a() -> None: + """Test EA metrics that are exposed in the context class for trace 2, activity A.""" + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.event_analysis(activity_name="Activity A", case_id="ExampleTrace2") + assert isinstance(res, EventAnalysis) + assert res.d == 1 + assert res.f == 0 + assert res.fm == 0 + assert res.m == 0 + assert res.c == 0 + assert res.md == 0 + assert res.fmd == 0 + assert res.fd == 0 + assert res.id == 1 + + +def test_context_ea_2_b() -> None: + """Test EA metrics that are exposed in the context class for trace 2, activity B.""" + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.event_analysis(activity_name="Activity B", case_id="ExampleTrace2") + assert isinstance(res, EventAnalysis) + assert res.d == 0 + assert res.f == 0 + assert res.fm == 1 + assert res.m == 1 + assert res.c == 0 + assert res.md == 0 + assert res.fmd == 1 + assert res.fd == 1 + assert res.id == 0 + + +def test_context_ea_rates_1_a() -> None: + """Test EA rates that are exposed in the context class for trace 1, activity A.""" + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.event_analysis(activity_name="Activity A", case_id="ExampleTrace1") + assert isinstance(res, EventAnalysis) + assert res.dr == 0.0 + assert res.fr == 0.5 + assert res.fmr == 0.0 + assert res.mr == 0.0 + assert res.cr_gt == 0.5 + assert res.cr_det == round(1 / 3, 4) + assert res.mdr == 0.0 + assert res.fmdr == 0.0 + assert res.fdr == round(2 / 3, 4) + assert res.idr == 0.0 + + +def test_context_ea_rates_1_b() -> None: + """Test EA rates that are exposed in the context class for trace 1, activity B.""" + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.event_analysis(activity_name="Activity B", case_id="ExampleTrace1") + assert isinstance(res, EventAnalysis) + assert res.dr == 0.0 + assert res.fr == 0.0 + assert res.fmr == 0.0 + assert res.mr == 1.0 + assert res.cr_gt == 0.0 + assert res.cr_det == 0.0 + assert res.mdr == 1.0 + assert res.fmdr == 0.0 + assert res.fdr == 0.0 + assert res.idr == 0.0 + + +def test_context_ea_rates_1_c() -> None: + """Test EA rates that are exposed in the context class for trace 1, activity C.""" + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.event_analysis(activity_name="Activity C", case_id="ExampleTrace1") + assert isinstance(res, EventAnalysis) + assert res.dr == 0.5 + assert res.fr == 0.0 + assert res.fmr == 0.0 + assert res.mr == 0.0 + assert res.cr_gt == 0.5 + assert res.cr_det == 1.0 + assert res.mdr == 0.0 + assert res.fmdr == 0.0 + assert res.fdr == 0.0 + assert res.idr == 0.0 + + +def test_context_ea_rates_2_a() -> None: + """Test EA rates that are exposed in the context class for trace 2, activity A.""" + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.event_analysis(activity_name="Activity A", case_id="ExampleTrace2") + assert isinstance(res, EventAnalysis) + assert res.dr == 1.0 + assert res.fr == 0.0 + assert res.fmr == 0.0 + assert res.mr == 0.0 + assert res.cr_gt == 0.0 + assert res.cr_det == 0.0 + assert res.mdr == 0.0 + assert res.fmdr == 0.0 + assert res.fdr == 0.0 + assert res.idr == 1.0 + + +def test_context_ea_rates_2_b() -> None: + """Test EA rates that are exposed in the context class for trace 2, activity B.""" + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.event_analysis(activity_name="Activity B", case_id="ExampleTrace2") + assert isinstance(res, EventAnalysis) + assert res.dr == 0.0 + assert res.fr == 0.0 + assert res.fmr == 0.5 + assert res.mr == 0.5 + assert res.cr_gt == 0.0 + assert res.cr_det == 0.0 + assert res.mdr == 0.0 + assert res.fmdr == 0.5 + assert res.fdr == 0.5 + assert res.idr == 0.0 + + +def test_context_ea_by_activity() -> None: + """Test EA by activity that is exposed in the context class.""" + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.event_analysis(activity_name="Activity A") + assert isinstance(res, EventAnalysis) + assert res.d == round((0 + 1) / 2, 4) + assert res.f == round((1 + 0) / 2, 4) + assert res.fm == round((0 + 0) / 2, 4) + assert res.m == round((0 + 0) / 2, 4) + assert res.c == round((2 + 0) / 2, 4) + assert res.md == round((0 + 0) / 2, 4) + assert res.fmd == round((0 + 0) / 2, 4) + assert res.fd == round((2 + 0) / 2, 4) + assert res.id == round((0 + 1) / 2, 4) + assert res.total_gt_events == 1.5 + assert res.total_det_events == 2 + res = context.event_analysis(activity_name="Activity B") + assert isinstance(res, EventAnalysis) + assert res.d == round((0 + 0) / 2, 4) + assert res.f == round((0 + 0) / 2, 4) + assert res.fm == round((0 + 1) / 2, 4) + assert res.m == round((2 + 1) / 2, 4) + assert res.c == round((0 + 0) / 2, 4) + assert res.md == round((0 + 1) / 2, 4) + assert res.fmd == round((0 + 1) / 2, 4) + assert res.fd == round((0 + 1) / 2, 4) + assert res.id == round((0 + 0) / 2, 4) + assert res.total_gt_events == 2 + assert res.total_det_events == 1.5 + res = context.event_analysis(activity_name="Activity C") + assert isinstance(res, EventAnalysis) + assert res.d == 1 + assert res.f == 0 + assert res.fm == 0 + assert res.m == 0 + assert res.c == 2 + assert res.md == 0 + assert res.fmd == 0 + assert res.fd == 0 + assert res.id == 0 + assert res.total_gt_events == 2 + assert res.total_det_events == 1 + + +def test_context_ea_by_activity_case() -> None: + """Test EA by activity and case that is exposed in the context class.""" + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.event_analysis() + assert isinstance(res, EventAnalysis) + assert res.d == round((0 + 0 + 1 + 1 + 0) / 5, 4) + assert res.f == round((1 + 0 + 0 + 0 + 0) / 5, 4) + assert res.fm == round((0 + 0 + 0 + 0 + 1) / 5, 4) + assert res.m == round((0 + 2 + 0 + 0 + 1) / 5, 4) + assert res.c == round((2 + 0 + 2 + 0 + 0) / 5, 4) + assert res.md == round((0 + 1 + 0 + 0 + 0) / 5, 4) + assert res.fmd == round((0 + 0 + 0 + 0 + 1) / 5, 4) + assert res.fd == round((2 + 0 + 0 + 0 + 1) / 5, 4) + assert res.id == round((0 + 0 + 0 + 0 + 1) / 5, 4) + assert res.total_gt_events == 1.8 + assert res.total_det_events == 1.6 + + +def test_context_ea_rates_by_activity() -> None: + """Test EA rates by activity that is exposed in the context class, aggregated by activity.""" + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.event_analysis(activity_name="Activity A") + assert isinstance(res, EventAnalysis) + assert res.dr == 0.3333 + assert res.fr == 0.3333 + assert res.fmr == 0 + assert res.mr == 0 + assert res.cr_gt == 0.3333 + assert res.cr_det == 0.25 + assert res.mdr == 0 + assert res.fmdr == 0 + assert res.fdr == 0.5 + assert res.idr == 0.25 + res = context.event_analysis(activity_name="Activity B") + assert isinstance(res, EventAnalysis) + assert res.dr == 0 + assert res.fr == 0 + assert res.fmr == 0.25 + assert res.mr == 0.75 + assert res.cr_gt == 0 + assert res.cr_det == 0 + assert res.mdr == 0.3333 + assert res.fmdr == 0.3333 + assert res.fdr == 0.3333 + assert res.idr == 0 + res = context.event_analysis(activity_name="Activity C") + assert isinstance(res, EventAnalysis) + assert res.dr == 0.5 + assert res.fr == 0.0 + assert res.fmr == 0.0 + assert res.mr == 0.0 + assert res.cr_gt == 0.5 + assert res.cr_det == 1.0 + assert res.mdr == 0.0 + assert res.fmdr == 0.0 + assert res.fdr == 0.0 + assert res.idr == 0.0 + + +def test_context_ea_rates_by_activity_case() -> None: + """Test EA rates by activity and case that is exposed in the context class, + aggregated by activity and case.""" + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.event_analysis() + assert isinstance(res, EventAnalysis) + assert res.dr == 0.2222 + assert res.fr == 0.1111 + assert res.fmr == 0.1111 + assert res.mr == 0.3333 + assert res.cr_gt == 0.2222 + assert res.cr_det == 0.25 + assert res.mdr == 0.125 + assert res.fmdr == 0.125 + assert res.fdr == 0.375 + assert res.idr == 0.125 diff --git a/package/tests/test_two_set_helper.py b/package/tests/test_two_set_helper.py new file mode 100644 index 0000000..e97d0bf --- /dev/null +++ b/package/tests/test_two_set_helper.py @@ -0,0 +1,591 @@ +"""Tests for the two_set_helper module.""" +import os +from dataclasses import fields +import aqudem +from aqudem.two_set_helper import (_two_set, TwoSet, _two_set_by_activity_case) +from .mocks.logs import (ground_truth_ten_eighteen, detected_ten_eighteen, + ground_truth_ten_eleven, detected_ten_eleven, + start_end_series_ten_eleven, start_end_series_ten_eighteen) + +rate_props = ["uar", "oar", "tpr", "fr", "uor", "tnr", "oor", "ir", "dr", "mr"] + + +def verify_single_act_rates_ten_eighteen(result: TwoSet) -> None: + assert result.uar == round(900 / 21601, 4) + assert result.oar == round(900 / 7200, 4) + assert result.tpr == round(13500 / 21601, 4) + assert result.fr == round(3600 / 21601, 4) + assert result.oor == round(3600 / 7200, 4) + assert result.tnr == round(2700 / 7200, 4) + assert result.uor == round(3601 / 21601, 4) + assert result.ir == 0.0 + assert result.dr == 0.0 + assert result.mr == 0.0 + sum_metr = 0.0 + for field in rate_props: + sum_metr += getattr(result, field) + assert abs(sum_metr - 2.0) < 0.001 + + +def verify_single_act_metrics_ten_eighteen(result: TwoSet) -> None: + assert isinstance(result, TwoSet) + assert result.ua == 900 + assert result.tp == 13500 + assert result.f == 3600 + assert result.oo == 3600 + assert result.oa == 900 + assert result.tn == 2700 + assert result.uo == 3601 + assert result.i == 0 + assert result.d == 0 + assert result.m == 0 + assert result.p == 21601 + assert result.n == 7200 + sum_metr = 0 + for field in fields(result): + sum_metr += getattr(result, field.name) + assert sum_metr == 28801 + + +def verify_two_set_metrics_ten_eleven(result: TwoSet) -> None: + assert isinstance(result, TwoSet) + assert result.oa == 600 + assert result.tp == 1080 + assert result.f == 300 + assert result.uo == 300 + assert result.tn == 540 + assert result.oo == 421 + assert result.ua == 300 + assert result.i == 60 + assert result.d == 0 + assert result.m == 0 + assert result.t == 3601 + assert result.p == 1980 + assert result.n == 1621 + + +def verify_two_set_rates_ten_eleven(result: TwoSet) -> None: + assert isinstance(result, TwoSet) + assert result.oar == round(600 / 1621, 4) + assert result.tpr == round(1080 / 1980, 4) + assert result.fr == round(300 / 1980, 4) + assert result.uor == round(300 / 1980, 4) + assert result.tnr == round(540 / 1621, 4) + assert result.oor == round(421 / 1621, 4) + assert result.uar == round(300 / 1980, 4) + assert result.ir == round(60 / 1621, 4) + assert result.dr == 0.0 + assert result.mr == 0.0 + + +def verify_rates_avg_ten_eleven_eighteen(result: TwoSet) -> None: + assert isinstance(result, TwoSet) + assert result.uar == 0.0572 + assert result.oar == 0.0989 + assert result.tpr == 0.3843 + assert result.fr == 0.1041 + assert result.uor == 0.1041 + assert result.tnr == 0.1218 + assert result.oor == 0.121 + assert result.ir == 0.0083 + assert result.dr == 0.0 + assert result.mr == 0.0 + sum_metr = 0.0 + for field in rate_props: + sum_metr += getattr(result, field) + assert abs(sum_metr - 2.0) < 0.001 + + +def verify_metrics_avg_ten_eleven_eighteen(result: TwoSet) -> None: + assert isinstance(result, TwoSet) + assert result.ua == 600 + assert result.tp == 7290 + assert result.f == 1950 + assert result.oo == 2010 + assert result.oa == 750 + assert result.tn == 1620 + assert result.uo == 1950 + assert result.i == 30 + assert result.d == 0 + assert result.m == 0 + + +def test_two_set() -> None: + result = _two_set(ground_truth_ten_eighteen, + detected_ten_eighteen, + start_end_series_ten_eighteen, + 1) + verify_single_act_metrics_ten_eighteen(result) + + result = _two_set(ground_truth_ten_eleven, + detected_ten_eleven, + start_end_series_ten_eleven, + 1) + verify_two_set_metrics_ten_eleven(result) + + +def test_two_set_rates() -> None: + result = _two_set(ground_truth_ten_eighteen, + detected_ten_eighteen, + start_end_series_ten_eighteen, + 1) + verify_single_act_rates_ten_eighteen(result) + + result = _two_set(ground_truth_ten_eleven, + detected_ten_eleven, + start_end_series_ten_eleven, + 1) + verify_two_set_rates_ten_eleven(result) + + +def test_two_set_two_herz() -> None: + result = _two_set(ground_truth_ten_eleven, + detected_ten_eleven, + start_end_series_ten_eleven, + 2) + assert isinstance(result, TwoSet) + assert result.oa == 1200 + assert result.tp == 2160 + assert result.f == 600 + assert result.uo == 600 + assert result.tn == 1080 + assert result.oo == 841 + assert result.ua == 600 + assert result.i == 120 + assert result.d == 0 + assert result.m == 0 + assert result.t == 7201 + assert result.p == 3960 + assert result.n == 3241 + + +def test_two_set_rates_two_hertz() -> None: + result = _two_set(ground_truth_ten_eleven, + detected_ten_eleven, + start_end_series_ten_eleven, + 2) + assert isinstance(result, TwoSet) + assert result.oar == round(1200 / 3241, 4) + assert result.tpr == round(2160 / 3960, 4) + assert result.fr == round(600 / 3960, 4) + assert result.uor == round(600 / 3960, 4) + assert result.tnr == round(1080 / 3241, 4) + assert result.oor == round(841 / 3241, 4) + assert result.uar == round(600 / 3960, 4) + assert result.ir == round(120 / 3241, 4) + assert result.dr == 0.0 + assert result.mr == 0.0 + + +def test_two_set_by_activity_only_one_act_in_log() -> None: + result_a = _two_set_by_activity_case(ground_truth_ten_eighteen, + detected_ten_eighteen, + 1, + "*", + "1", + start_end_series_ten_eighteen) + verify_single_act_metrics_ten_eighteen(result_a) + + +def test_two_set_rates_by_activity_only_one_act_in_log() -> None: + result_a = _two_set_by_activity_case(ground_truth_ten_eighteen, + detected_ten_eighteen, + 1, + "*", + "1", + start_end_series_ten_eighteen) + verify_single_act_rates_ten_eighteen(result_a) + + +# pylint: disable=too-many-statements +def test_two_set_via_context_one_case_one_activity() -> None: + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.two_set(activity_name="Activity A", case_id="ExampleTrace1") + assert isinstance(res, TwoSet) + assert res.tp == 780 + assert res.d == 0 + assert res.f == 60 + assert res.ua == 0 + assert res.uo == 120 + assert res.tn == 2281 + assert res.i == 0 + assert res.m == 0 + assert res.oa == 0 + assert res.oo == 300 + assert res.p == 960 + assert res.n == 2581 + + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.two_set(activity_name="Activity B", case_id="ExampleTrace1") + assert isinstance(res, TwoSet) + assert res.tp == 390 + assert res.d == 0 + assert res.f == 0 + assert res.ua == 0 + assert res.uo == 0 + assert res.tn == 3001 + assert res.i == 0 + assert res.m == 30 + assert res.oa == 120 + assert res.oo == 0 + assert res.p == 390 + assert res.n == 3151 + + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.two_set(activity_name="Activity C", case_id="ExampleTrace1") + assert isinstance(res, TwoSet) + assert res.tp == 421 + assert res.d == 240 + assert res.f == 0 + assert res.ua == 120 + assert res.uo == 0 + assert res.tn == 2760 + assert res.i == 0 + assert res.m == 0 + assert res.oa == 0 + assert res.oo == 0 + assert res.p == 781 + assert res.n == 2760 + + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.two_set(activity_name="Activity A", case_id="ExampleTrace2") + assert isinstance(res, TwoSet) + assert res.tp == 0 + assert res.d == 120 + assert res.f == 0 + assert res.ua == 0 + assert res.uo == 0 + assert res.tn == 241 + assert res.i == 180 + assert res.m == 0 + assert res.oa == 0 + assert res.oo == 0 + assert res.p == 120 + assert res.n == 421 + + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.two_set(activity_name="Activity B", case_id="ExampleTrace2") + assert isinstance(res, TwoSet) + assert res.tp == 300 + assert res.d == 0 + assert res.f == 60 + assert res.ua == 0 + assert res.uo == 61 + assert res.tn == 0 + assert res.i == 0 + assert res.m == 60 + assert res.oa == 60 + assert res.oo == 0 + assert res.p == 421 + assert res.n == 120 + + +# pylint: disable=too-many-statements +def test_two_set_rates_via_context_one_case_one_activity() -> None: + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.two_set(activity_name="Activity A", case_id="ExampleTrace1") + assert isinstance(res, TwoSet) + assert res.tpr == round(780 / 960, 4) + assert res.dr == 0.0 + assert res.fr == round(60 / 960, 4) + assert res.uar == 0.0 + assert res.uor == round(120 / 960, 4) + assert res.tnr == round(2281 / 2581, 4) + assert res.ir == 0.0 + assert res.mr == 0.0 + assert res.oar == 0.0 + assert res.oor == round(300 / 2581, 4) + summed = 0.0 + for field in rate_props: + summed += getattr(res, field) + assert abs(summed - 2.0) < 0.001 + + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.two_set(activity_name="Activity B", case_id="ExampleTrace1") + assert isinstance(res, TwoSet) + assert res.tpr == round(390 / 390, 4) + assert res.dr == 0.0 + assert res.fr == 0.0 + assert res.uar == 0.0 + assert res.uor == 0.0 + assert res.tnr == round(3001 / 3151, 4) + assert res.ir == 0.0 + assert res.mr == round(30 / 3151, 4) + assert res.oar == round(120 / 3151, 4) + assert res.oor == 0.0 + summed = 0.0 + for field in rate_props: + summed += getattr(res, field) + assert abs(summed - 2.0) < 0.001 + + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.two_set(activity_name="Activity C", case_id="ExampleTrace1") + assert isinstance(res, TwoSet) + assert res.tpr == round(421 / 781, 4) + assert res.dr == round(240 / 781, 4) + assert res.fr == 0.0 + assert res.uar == round(120 / 781, 4) + assert res.uor == 0.0 + assert res.tnr == round(2760 / 2760, 4) + assert res.ir == 0.0 + assert res.mr == 0.0 + assert res.oar == 0.0 + assert res.oor == 0.0 + summed = 0.0 + for field in rate_props: + summed += getattr(res, field) + assert abs(summed - 2.0) < 0.001 + + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.two_set(activity_name="Activity A", case_id="ExampleTrace2") + assert isinstance(res, TwoSet) + assert res.tpr == 0.0 + assert res.dr == round(120 / 120, 4) + assert res.fr == 0.0 + assert res.uar == 0.0 + assert res.uor == 0.0 + assert res.tnr == round(241 / 421, 4) + assert res.ir == round(180 / 421, 4) + assert res.mr == 0.0 + assert res.oar == 0.0 + assert res.oor == 0.0 + summed = 0.0 + for field in rate_props: + summed += getattr(res, field) + assert abs(summed - 2.0) < 0.001 + + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.two_set(activity_name="Activity B", case_id="ExampleTrace2") + assert isinstance(res, TwoSet) + assert res.tpr == round(300 / 421, 4) + assert res.dr == 0.0 + assert res.fr == round(60 / 421, 4) + assert res.uar == 0.0 + assert res.uor == round(61 / 421, 4) + assert res.tnr == 0.0 + assert res.ir == 0.0 + assert res.mr == round(60 / 120, 4) + assert res.oar == round(60 / 120, 4) + assert res.oor == 0.0 + summed = 0.0 + for field in rate_props: + summed += getattr(res, field) + assert abs(summed - 2.0) < 0.001 + + +def test_context_two_set_by_activty() -> None: + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.two_set(activity_name="Activity A") + assert isinstance(res, TwoSet) + assert res.tp == round((780 + 0) / 2, 4) + assert res.d == round((0 + 120) / 2, 4) + assert res.f == round((60 + 0) / 2, 4) + assert res.ua == round((0 + 0) / 2, 4) + assert res.uo == round((120 + 0) / 2, 4) + assert res.tn == round((2281 + 241) / 2, 4) + assert res.i == round((0 + 180) / 2, 4) + assert res.m == round((0 + 0) / 2, 4) + assert res.oa == round((0 + 0) / 2, 4) + assert res.oo == round((300 + 0) / 2, 4) + assert res.p == round((960 + 120) / 2, 4) + assert res.n == round((2581 + 421) / 2, 4) + res = context.two_set(activity_name="Activity B") + assert isinstance(res, TwoSet) + assert res.tp == round((390 + 300) / 2, 4) + assert res.d == round((0 + 0) / 2, 4) + assert res.f == round((0 + 60) / 2, 4) + assert res.ua == round((0 + 0) / 2, 4) + assert res.uo == round((0 + 61) / 2, 4) + assert res.tn == round((3001 + 0) / 2, 4) + assert res.i == round((0 + 0) / 2, 4) + assert res.m == round((30 + 60) / 2, 4) + assert res.oa == round((120 + 60) / 2, 4) + assert res.oo == round((0 + 0) / 2, 4) + assert res.p == round((390 + 421) / 2, 4) + assert res.n == round((3151 + 120) / 2, 4) + res = context.two_set(activity_name="Activity C") + assert isinstance(res, TwoSet) + assert res.tp == 421 + assert res.d == 240 + assert res.f == 0 + assert res.ua == 120 + assert res.uo == 0 + assert res.tn == 2760 + assert res.i == 0 + assert res.m == 0 + assert res.oa == 0 + assert res.oo == 0 + assert res.p == 781 + assert res.n == 2760 + + +def test_context_two_set_by_case() -> None: + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.two_set(case_id="ExampleTrace1") + assert isinstance(res, TwoSet) + assert res.tp == round((780 + 390 + 421) / 3, 4) + assert res.d == round((0 + 0 + 240) / 3, 4) + assert res.f == round((60 + 0 + 0) / 3, 4) + assert res.ua == round((0 + 0 + 120) / 3, 4) + assert res.uo == round((120 + 0 + 0) / 3, 4) + assert res.tn == round((2281 + 3001 + 2760) / 3, 4) + assert res.i == round((0 + 0 + 0) / 3, 4) + assert res.m == round((0 + 30 + 0) / 3, 4) + assert res.oa == round((0 + 120 + 0) / 3, 4) + assert res.oo == round((300 + 0 + 0) / 3, 4) + assert res.p == round((960 + 390 + 781) / 3, 4) + assert res.n == round((2581 + 3151 + 2760) / 3, 4) + res = context.two_set(case_id="ExampleTrace2") + assert isinstance(res, TwoSet) + assert res.tp == round((0 + 300) / 2, 4) + assert res.d == round((120 + 0) / 2, 4) + assert res.f == round((0 + 60) / 2, 4) + assert res.ua == round((0 + 0) / 2, 4) + assert res.uo == round((0 + 61) / 2, 4) + assert res.tn == round((241 + 0) / 2, 4) + assert res.i == round((180 + 0) / 2, 4) + assert res.m == round((0 + 60) / 2, 4) + assert res.oa == round((0 + 60) / 2, 4) + assert res.oo == round((0 + 0) / 2, 4) + + +def test_context_two_set_by_activty_case() -> None: + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.two_set() + assert isinstance(res, TwoSet) + assert res.tp == round((780 + 390 + 421 + 0 + 300) / 5, 4) + assert res.d == round((0 + 0 + 240 + 120 + 0) / 5, 4) + assert res.f == round((60 + 0 + 0 + 0 + 60) / 5, 4) + assert res.ua == round((0 + 0 + 120 + 0 + 0) / 5, 4) + assert res.uo == round((120 + 0 + 0 + 0 + 61) / 5, 4) + assert res.tn == round((2281 + 3001 + 2760 + 241 + 0) / 5, 4) + assert res.i == round((0 + 0 + 0 + 180 + 0) / 5, 4) + assert res.m == round((0 + 30 + 0 + 0 + 60) / 5, 4) + assert res.oa == round((0 + 120 + 0 + 0 + 60) / 5, 4) + assert res.oo == round((300 + 0 + 0 + 0 + 0) / 5, 4) + assert res.p == round((960 + 390 + 781 + 120 + 421) / 5, 4) + assert res.n == round((2581 + 3151 + 2760 + 421 + 120) / 5, 4) + + +def test_context_two_set_rates_by_activty() -> None: + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.two_set(activity_name="Activity A") + assert isinstance(res, TwoSet) + assert res.tpr == round((780 + 0) / (960 + 120), 4) + assert res.dr == round((0 + 120) / (960 + 120), 4) + assert res.fr == round((60 + 0) / (960 + 120), 4) + assert res.uar == round((0 + 0) / (960 + 120), 4) + assert res.uor == round((120 + 0) / (960 + 120), 4) + assert res.tnr == round((2281 + 241) / (2581 + 421), 4) + assert res.ir == round((0 + 180) / (2581 + 421), 4) + assert res.mr == round((0 + 0) / (2581 + 421), 4) + assert res.oar == round((0 + 0) / (2581 + 421), 4) + assert res.oor == round((300 + 0) / (2581 + 421), 4) + summed = 0.0 + for field in rate_props: + summed += getattr(res, field) + assert abs(summed - 2.0) < 0.001 + res = context.two_set(activity_name="Activity B") + assert isinstance(res, TwoSet) + assert res.tpr == round((390 + 300) / (390 + 421), 4) + assert res.dr == round((0 + 0) / (390 + 421), 4) + assert res.fr == round((0 + 60) / (390 + 421), 4) + assert res.uar == round((0 + 0) / (390 + 421), 4) + assert res.uor == round((0 + 61) / (390 + 421), 4) + assert res.tnr == round((3001 + 0) / (3151 + 120), 4) + assert res.ir == round((0 + 0) / (3151 + 120), 4) + assert res.mr == round((30 + 60) / (3151 + 120), 4) + assert res.oar == round((120 + 60) / (3151 + 120), 4) + assert res.oor == round((0 + 0) / (3151 + 120), 4) + summed = 0.0 + for field in rate_props: + summed += getattr(res, field) + assert abs(summed - 2.0) < 0.001 + res = context.two_set(activity_name="Activity C") + assert isinstance(res, TwoSet) + assert res.tpr == round(421 / 781, 4) + assert res.dr == round(240 / 781, 4) + assert res.fr == 0.0 + assert res.uar == round(120 / 781, 4) + assert res.uor == 0.0 + assert res.tnr == round(2760 / 2760, 4) + assert res.ir == 0.0 + assert res.mr == 0.0 + assert res.oar == 0.0 + assert res.oor == 0.0 + summed = 0.0 + for field in rate_props: + summed += getattr(res, field) + assert abs(summed - 2.0) < 0.001 + + +def test_context_two_set_rates_by_case() -> None: + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.two_set(case_id="ExampleTrace1") + assert isinstance(res, TwoSet) + assert res.tpr == round((780 + 390 + 421) / (960 + 390 + 781), 4) + assert res.dr == round((0 + 0 + 240) / (960 + 390 + 781), 4) + assert res.fr == round((60 + 0 + 0) / (960 + 390 + 781), 4) + assert res.uar == round((0 + 0 + 120) / (960 + 390 + 781), 4) + assert res.uor == round((120 + 0 + 0) / (960 + 390 + 781), 4) + assert res.tnr == round((2281 + 3001 + 2760) / (2581 + 3151 + 2760), 4) + assert res.ir == round((0 + 0 + 0) / (2581 + 3151 + 2760), 4) + assert res.mr == round((0 + 30 + 0) / (2581 + 3151 + 2760), 4) + assert res.oar == round((0 + 120 + 0) / (2581 + 3151 + 2760), 4) + assert res.oor == round((300 + 0 + 0) / (2581 + 3151 + 2760), 4) + summed = 0.0 + for field in rate_props: + summed += getattr(res, field) + assert abs(summed - 2.0) < 0.001 + res = context.two_set(case_id="ExampleTrace2") + assert isinstance(res, TwoSet) + assert res.tpr == round((0 + 300) / (120 + 421), 4) + assert res.dr == round((120 + 0) / (120 + 421), 4) + assert res.fr == round((0 + 60) / (120 + 421), 4) + assert res.uar == round((0 + 0) / (120 + 421), 4) + assert res.uor == round((0 + 61) / (120 + 421), 4) + assert res.tnr == round((241 + 0) / (421 + 120), 4) + assert res.ir == round((180 + 0) / (421 + 120), 4) + assert res.mr == round((0 + 60) / (421 + 120), 4) + assert res.oar == round((0 + 60) / (421 + 120), 4) + assert res.oor == round((0 + 0) / (421 + 120), 4) + summed = 0.0 + for field in rate_props: + summed += getattr(res, field) + assert abs(summed - 2.0) < 0.001 + + +def test_context_two_set_rates_by_activty_case() -> None: + context = aqudem.Context(os.path.join("tests", "resources", "ground_truth.xes"), + os.path.join("tests", "resources", "detected.xes")) + res = context.two_set() + assert isinstance(res, TwoSet) + assert res.tpr == round((780 + 390 + 421 + 0 + 300) / (960 + 390 + 781 + 120 + 421), 4) + assert res.dr == round((0 + 0 + 240 + 120 + 0) / (960 + 390 + 781 + 120 + 421), 4) + assert res.fr == round((60 + 0 + 0 + 0 + 60) / (960 + 390 + 781 + 120 + 421), 4) + assert res.uar == round((0 + 0 + 120 + 0 + 0) / (960 + 390 + 781 + 120 + 421), 4) + assert res.uor == round((120 + 0 + 0 + 0 + 61) / (960 + 390 + 781 + 120 + 421), 4) + assert res.tnr == round((2281 + 3001 + 2760 + 241 + 0) / (2581 + 3151 + 2760 + 421 + 120), 4) + assert res.ir == round((0 + 0 + 0 + 180 + 0) / (2581 + 3151 + 2760 + 421 + 120), 4) + assert res.mr == round((0 + 30 + 0 + 0 + 60) / (2581 + 3151 + 2760 + 421 + 120), 4) + assert res.oar == round((0 + 120 + 0 + 60 + 0) / (2581 + 3151 + 2760 + 421 + 120), 4) + assert res.oor == round((300 + 0 + 0 + 0 + 0) / (2581 + 3151 + 2760 + 421 + 120), 4) + summed = 0.0 + for field in rate_props: + summed += getattr(res, field) + assert abs(summed - 2.0) < 0.001 diff --git a/package/tests/test_utils.py b/package/tests/test_utils.py new file mode 100644 index 0000000..46aedbe --- /dev/null +++ b/package/tests/test_utils.py @@ -0,0 +1,55 @@ +"""This module contains tests for the aqudem utils module.""" +import os +import pytest +import aqudem + + +def test_xes_check_missing_sampling_freq() -> None: + """Test the XESMissingSamplingFreqError exception.""" + with pytest.raises(aqudem.utils.XESMissingSamplingFreqError): + aqudem.Context(os.path.join("tests", "resources", "ground_truth_missingsamplingfreq.xes"), + os.path.join("tests", "resources", "detected_missingsamplingfreq.xes")) + + +def test_xes_missing_lifecycle_transition() -> None: + """Test the XESIncorrectLifecycleTransitionError exception.""" + with pytest.raises(aqudem.utils.XESIncorrectLifecycleTransitionError): + aqudem.Context(os.path.join("tests", "resources", "ground_truth_missinglifecycle.xes"), + os.path.join("tests", "resources", "detected_missinglifecycle.xes")) + + +def test_xes_wrong_lifecycle_transition() -> None: + """Test the XESIncorrectLifecycleTransitionError exception.""" + with pytest.raises(aqudem.utils.XESIncorrectLifecycleTransitionError): + aqudem.Context(os.path.join("tests", "resources", "ground_truth_wronglifecycle.xes"), + os.path.join("tests", "resources", "detected_wronglifecycle.xes")) + + +def test_xes_toomanycomplete_lifecycle_transition() -> None: + """Test the XESIncorrectLifecycleTransitionError exception.""" + with pytest.raises(aqudem.utils.XESIncorrectLifecycleTransitionError): + aqudem.Context(os.path.join("tests", + "resources", + "ground_truth_toomanycompletelifecycle.xes"), + os.path.join("tests", "resources", "detected_toomanycompletelifecycle.xes")) + + +def test_xes_missing_timestamp() -> None: + """Test the XESMissingTimestamp exception.""" + with pytest.raises(aqudem.utils.XESMissingTimestamp): + aqudem.Context(os.path.join("tests", "resources", "ground_truth_missingtimestamp.xes"), + os.path.join("tests", "resources", "detected_missingtimestamp.xes")) + + +def test_xes_missing_activity_name() -> None: + """Test the XESMissingActivityName exception.""" + with pytest.raises(aqudem.utils.XESMissingActivityName): + aqudem.Context(os.path.join("tests", "resources", "ground_truth_missingactivityname.xes"), + os.path.join("tests", "resources", "detected_missingactivityname.xes")) + + +def test_xes_missing_trace_name() -> None: + """Test the XESMissingTraceNameAttribute exception.""" + with pytest.raises(aqudem.utils.XESMissingTraceNameAttribute): + aqudem.Context(os.path.join("tests", "resources", "ground_truth_missingtracename.xes"), + os.path.join("tests", "resources", "detected_missingtracename.xes")) diff --git a/package/tests/test_ward_helper.py b/package/tests/test_ward_helper.py new file mode 100644 index 0000000..637feef --- /dev/null +++ b/package/tests/test_ward_helper.py @@ -0,0 +1,250 @@ +"""This module contains tests for the aqudem utils module.""" +from datetime import datetime +import numpy as np +import static_frame as sf +import pytest +from aqudem import ward_helper +from aqudem.ward_helper import _is_during_activity_exec +from .mocks.logs import detected_ten_eleven, ground_truth_ten_eleven, start_end_series_ten_eleven + + +def test_generate_eight_type_with_valid_input() -> None: + assert ward_helper._generate_eight_type("TP", "FP", "TN") == "Oo" + + +def test_generate_eight_type_with_invalid_segment_type() -> None: + with pytest.raises(ValueError): + ward_helper._generate_eight_type("Invalid", "FP", "TN") + + +def test_generate_eight_type_with_invalid_curr_type() -> None: + with pytest.raises(ValueError): + ward_helper._generate_eight_type("TP", "Invalid", "TN") + + +def test_generate_eight_type_with_invalid_prev_type() -> None: + with pytest.raises(ValueError): + ward_helper._generate_eight_type("NaN", "FP", "TN") + + +def test_generate_eight_type_with_invalid_next_type() -> None: + with pytest.raises(ValueError): + ward_helper._generate_eight_type("TP", "FP", "Invalid") + + +def test_generate_segment_scores_with_valid_input() -> None: + ground_truth = sf.FrameHE.from_dict({ + "lifecycle:transition": ["start", "complete", "start", "complete", "start", "complete"], + "time:timestamp": [datetime(2021, 1, 1, 10, 0), datetime(2021, 1, 1, 12, 0), + datetime(2021, 1, 1, 13, 0), datetime(2021, 1, 1, 15, 0), + datetime(2021, 1, 1, 16, 0), datetime(2021, 1, 1, 18, 0)] + }, dtypes={"lifecycle:transition": str, "time:timestamp": np.datetime64}) + detected = sf.FrameHE.from_dict({ + "lifecycle:transition": ["start", "complete", "start", "complete", "start", "complete", + "start", "complete", "start", "complete"], + "time:timestamp": [datetime(2021, 1, 1, 10, 15), datetime(2021, 1, 1, 11, 0), + datetime(2021, 1, 1, 11, 30), datetime(2021, 1, 1, 12, 30), + datetime(2021, 1, 1, 13, 0), datetime(2021, 1, 1, 14, 0), + datetime(2021, 1, 1, 14, 30), datetime(2021, 1, 1, 15, 30), + datetime(2021, 1, 1, 15, 45), datetime(2021, 1, 1, 17, 0)] + }, dtypes={"lifecycle:transition": str, "time:timestamp": np.datetime64}) + result = ward_helper._generate_segment_scores(ground_truth, + detected, + datetime(2021, 1, 1, 10, 0), + datetime(2021, 1, 1, 18, 0)) + assert len(result) != 0 + assert "start" in result.columns + assert "end" in result.columns + assert "type" in result.columns + + assert result.iloc[0]["type"] == "Ua" + assert result.iloc[0]["start"] == datetime(2021, 1, 1, 10, 0) + assert result.iloc[0]["end"] == datetime(2021, 1, 1, 10, 15) + + assert result.iloc[1]["type"] == "TP" + assert result.iloc[1]["start"] == datetime(2021, 1, 1, 10, 15) + assert result.iloc[1]["end"] == datetime(2021, 1, 1, 11, 0) + + assert result.iloc[2]["type"] == "F" + assert result.iloc[2]["start"] == datetime(2021, 1, 1, 11, 0) + assert result.iloc[2]["end"] == datetime(2021, 1, 1, 11, 30) + + assert result.iloc[3]["type"] == "TP" + assert result.iloc[3]["start"] == datetime(2021, 1, 1, 11, 30) + assert result.iloc[3]["end"] == datetime(2021, 1, 1, 12, 0) + + assert result.iloc[4]["type"] == "Oo" + assert result.iloc[4]["start"] == datetime(2021, 1, 1, 12, 0) + assert result.iloc[4]["end"] == datetime(2021, 1, 1, 12, 30) + + assert result.iloc[5]["type"] == "TN" + assert result.iloc[5]["start"] == datetime(2021, 1, 1, 12, 30) + assert result.iloc[5]["end"] == datetime(2021, 1, 1, 13, 0) + + assert result.iloc[6]["type"] == "TP" + assert result.iloc[6]["start"] == datetime(2021, 1, 1, 13, 0) + assert result.iloc[6]["end"] == datetime(2021, 1, 1, 14, 0) + + assert result.iloc[7]["type"] == "F" + assert result.iloc[7]["start"] == datetime(2021, 1, 1, 14, 0) + assert result.iloc[7]["end"] == datetime(2021, 1, 1, 14, 30) + + assert result.iloc[8]["type"] == "TP" + assert result.iloc[8]["start"] == datetime(2021, 1, 1, 14, 30) + assert result.iloc[8]["end"] == datetime(2021, 1, 1, 15, 0) + + assert result.iloc[9]["type"] == "Oo" + assert result.iloc[9]["start"] == datetime(2021, 1, 1, 15, 0) + assert result.iloc[9]["end"] == datetime(2021, 1, 1, 15, 30) + + assert result.iloc[10]["type"] == "TN" + assert result.iloc[10]["start"] == datetime(2021, 1, 1, 15, 30) + assert result.iloc[10]["end"] == datetime(2021, 1, 1, 15, 45) + + assert result.iloc[11]["type"] == "Oa" + assert result.iloc[11]["start"] == datetime(2021, 1, 1, 15, 45) + assert result.iloc[11]["end"] == datetime(2021, 1, 1, 16, 0) + + assert result.iloc[12]["type"] == "TP" + assert result.iloc[12]["start"] == datetime(2021, 1, 1, 16, 0) + assert result.iloc[12]["end"] == datetime(2021, 1, 1, 17, 0) + + assert result.iloc[13]["type"] == "Uo" + assert result.iloc[13]["start"] == datetime(2021, 1, 1, 17, 0) + assert result.iloc[13]["end"] == datetime(2021, 1, 1, 18, 0) + + +# pylint: disable=too-many-statements, unsubscriptable-object +def test_generate_segment_scores_with_valid_input_2() -> None: + result = ward_helper._generate_segment_scores(ground_truth_ten_eleven, + detected_ten_eleven, + start_end_series_ten_eleven["1"][0], + start_end_series_ten_eleven["1"][1]) + assert len(result) != 0 + assert "start" in result.columns + assert "end" in result.columns + assert "type" in result.columns + + assert result.iloc[0]["type"] == "Oa" + assert result.iloc[0]["start"] == datetime(2021, 1, 1, 10, 0) + assert result.iloc[0]["end"] == datetime(2021, 1, 1, 10, 5) + + assert result.iloc[1]["type"] == "TP" + assert result.iloc[1]["start"] == datetime(2021, 1, 1, 10, 5) + assert result.iloc[1]["end"] == datetime(2021, 1, 1, 10, 10) + + assert result.iloc[2]["type"] == "F" + assert result.iloc[2]["start"] == datetime(2021, 1, 1, 10, 10) + assert result.iloc[2]["end"] == datetime(2021, 1, 1, 10, 15) + + assert result.iloc[3]["type"] == "TP" + assert result.iloc[3]["start"] == datetime(2021, 1, 1, 10, 15) + assert result.iloc[3]["end"] == datetime(2021, 1, 1, 10, 20) + + assert result.iloc[4]["type"] == "Uo" + assert result.iloc[4]["start"] == datetime(2021, 1, 1, 10, 20) + assert result.iloc[4]["end"] == datetime(2021, 1, 1, 10, 25) + + assert result.iloc[5]["type"] == "TN" + assert result.iloc[5]["start"] == datetime(2021, 1, 1, 10, 25) + assert result.iloc[5]["end"] == datetime(2021, 1, 1, 10, 30) + + assert result.iloc[6]["type"] == "Oa" + assert result.iloc[6]["start"] == datetime(2021, 1, 1, 10, 30) + assert result.iloc[6]["end"] == datetime(2021, 1, 1, 10, 35) + + assert result.iloc[7]["type"] == "TP" + assert result.iloc[7]["start"] == datetime(2021, 1, 1, 10, 35) + assert result.iloc[7]["end"] == datetime(2021, 1, 1, 10, 38) + + assert result.iloc[8]["type"] == "Oo" + assert result.iloc[8]["start"] == datetime(2021, 1, 1, 10, 38) + assert result.iloc[8]["end"] == datetime(2021, 1, 1, 10, 40) + + assert result.iloc[9]["type"] == "TN" + assert result.iloc[9]["start"] == datetime(2021, 1, 1, 10, 40) + assert result.iloc[9]["end"] == datetime(2021, 1, 1, 10, 41) + + assert result.iloc[10]["type"] == "I" + assert result.iloc[10]["start"] == datetime(2021, 1, 1, 10, 41) + assert result.iloc[10]["end"] == datetime(2021, 1, 1, 10, 42) + + assert result.iloc[11]["type"] == "TN" + assert result.iloc[11]["start"] == datetime(2021, 1, 1, 10, 42) + assert result.iloc[11]["end"] == datetime(2021, 1, 1, 10, 45) + + assert result.iloc[12]["type"] == "Ua" + assert result.iloc[12]["start"] == datetime(2021, 1, 1, 10, 45) + assert result.iloc[12]["end"] == datetime(2021, 1, 1, 10, 50) + + assert result.iloc[13]["type"] == "TP" + assert result.iloc[13]["start"] == datetime(2021, 1, 1, 10, 50) + assert result.iloc[13]["end"] == datetime(2021, 1, 1, 10, 55) + + assert result.iloc[14]["type"] == "Oo" + assert result.iloc[14]["start"] == datetime(2021, 1, 1, 10, 55) + assert result.iloc[14]["end"] == datetime(2021, 1, 1, 11, 0) + + +def test_generate_segment_scores_with_empty_input() -> None: + ground_truth = sf.FrameHE.from_dict({ + "lifecycle:transition": [], + "time:timestamp": [] + }) + detected = sf.FrameHE.from_dict({ + "lifecycle:transition": [], + "time:timestamp": [] + }) + result = ward_helper._generate_segment_scores(ground_truth, detected, None, None) + assert len(result) == 0 + + +def test_is_during_activity_exec() -> None: + # Test case where timestamp is during an activity execution + log = sf.FrameHE.from_dict({ + "lifecycle:transition": ["start", "complete"], + "time:timestamp": [datetime(2021, 1, 1, 10, 0), datetime(2021, 1, 1, 11, 0)] + }) + timestamp = datetime(2021, 1, 1, 10, 30) + assert _is_during_activity_exec(log, timestamp) is True + + # Test case where timestamp is not during an activity execution + log = sf.FrameHE.from_dict({ + "lifecycle:transition": ["start", "complete"], + "time:timestamp": [datetime(2021, 1, 1, 10, 0), datetime(2021, 1, 1, 11, 0)] + }) + timestamp = datetime(2021, 1, 1, 11, 30) + assert _is_during_activity_exec(log, timestamp) is False + + # Test case where timestamp is exactly at the start of an activity + log = sf.FrameHE.from_dict({ + "lifecycle:transition": ["start", "complete"], + "time:timestamp": [datetime(2021, 1, 1, 10, 0), datetime(2021, 1, 1, 11, 0)] + }) + timestamp = datetime(2021, 1, 1, 10, 0) + assert _is_during_activity_exec(log, timestamp) is True + + # Test case where timestamp is exactly at the end of an activity + log = sf.FrameHE.from_dict({ + "lifecycle:transition": ["start", "complete"], + "time:timestamp": [datetime(2021, 1, 1, 10, 0), datetime(2021, 1, 1, 11, 0)] + }) + timestamp = datetime(2021, 1, 1, 11, 0) + assert _is_during_activity_exec(log, timestamp) is False + + # Test case where log is empty + log = sf.FrameHE.from_dict({ + "lifecycle:transition": [], + "time:timestamp": [] + }) + timestamp = datetime(2021, 1, 1, 10, 0) + assert _is_during_activity_exec(log, timestamp) is False + + # Test case where lifecycle:transition is not in ["start", "complete"] + log = sf.FrameHE.from_dict({ + "lifecycle:transition": ["other"], + "time:timestamp": [datetime(2021, 1, 1, 10, 0)] + }) + timestamp = datetime(2021, 1, 1, 10, 0) + with pytest.raises(ValueError): + _is_during_activity_exec(log, timestamp) diff --git a/package/tox.ini b/package/tox.ini new file mode 100644 index 0000000..d8ae4d7 --- /dev/null +++ b/package/tox.ini @@ -0,0 +1,26 @@ +[tox] +envlist = py36, py37, py38, flake8 + +[travis] +python = + 3.8: py38 + 3.7: py37 + 3.6: py36 + +[testenv:flake8] +basepython = python +deps = flake8 +commands = flake8 aqudem tests + +[testenv] +setenv = + PYTHONPATH = {toxinidir} +deps = + -r{toxinidir}/requirements_dev.txt +; If you want to make tox run the tests with the same versions, create a +; requirements.txt with the pinned versions and uncomment the following line: +; -r{toxinidir}/requirements.txt +commands = + pip install -U pip + pytest --basetemp={envtmpdir} +