diff --git a/binder/packedselection.ipynb b/binder/packedselection.ipynb new file mode 100644 index 000000000..b042a15b8 --- /dev/null +++ b/binder/packedselection.ipynb @@ -0,0 +1,1745 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0ef5ce86-6598-46ad-8fd4-c074b13c06d2", + "metadata": {}, + "source": [ + "# PackedSelection in Coffea 2023\n", + "\n", + "In `coffea`, `PackedSelection` is a class that can store several boolean arrays in a memory-efficient manner and evaluate arbitrary combinations of boolean requirements in an CPU-efficient way. Supported inputs include 1D numpy or awkward arrays and it has built-in functionalities to form analysis in signal and control regions, and to implement cutflow or \"N-1\" plots.\n", + "\n", + "Although `coffea` 2023 should be used in delayed mode (using `dask-awkward`), we will first present these functionalities eagerly (like in `coffea` 0.7) to showcase this better. Let's first read a sample file of 40 Drell-Yan events to demonstrate the utilities using our `NanoAODSchema` as our schema." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "d144affc-9918-4642-940e-148335eed6b7", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/iason/fun/coffea_dev/coffea/binder/coffea/nanoevents/schemas/nanoaod.py:215: RuntimeWarning: Missing cross-reference index for FatJet_genJetAK8Idx => GenJetAK8\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/html": [ + "
[{FsrPhoton: [], Electron: [], SoftActivityJetHT5: 63.5, RawMET: {...}, ...},\n",
+       " {FsrPhoton: [], Electron: [{...}], SoftActivityJetHT5: 64, RawMET: {...}, ...},\n",
+       " {FsrPhoton: [], Electron: [Electron, Electron], SoftActivityJetHT5: 130, ...},\n",
+       " {FsrPhoton: [], Electron: [Electron, Electron], SoftActivityJetHT5: 25.8, ...},\n",
+       " {FsrPhoton: [], Electron: [], SoftActivityJetHT5: 172, RawMET: {...}, ...},\n",
+       " {FsrPhoton: [], Electron: [{...}], SoftActivityJetHT5: 54.4, RawMET: ..., ...},\n",
+       " {FsrPhoton: [], Electron: [{...}], SoftActivityJetHT5: 96.2, RawMET: ..., ...},\n",
+       " {FsrPhoton: [], Electron: [], SoftActivityJetHT5: 19, RawMET: {...}, ...},\n",
+       " {FsrPhoton: [], Electron: [], SoftActivityJetHT5: 9.36, RawMET: {...}, ...},\n",
+       " {FsrPhoton: [], Electron: [{...}], SoftActivityJetHT5: 115, RawMET: ..., ...},\n",
+       " ...,\n",
+       " {FsrPhoton: [], Electron: [{...}], SoftActivityJetHT5: 49.6, RawMET: ..., ...},\n",
+       " {FsrPhoton: [], Electron: [], SoftActivityJetHT5: 14.7, RawMET: {...}, ...},\n",
+       " {FsrPhoton: [], Electron: [{...}], SoftActivityJetHT5: 22.1, RawMET: ..., ...},\n",
+       " {FsrPhoton: [], Electron: [], SoftActivityJetHT5: 33.9, RawMET: {...}, ...},\n",
+       " {FsrPhoton: [], Electron: [{...}], SoftActivityJetHT5: 16.2, RawMET: ..., ...},\n",
+       " {FsrPhoton: [], Electron: [], SoftActivityJetHT5: 28.4, RawMET: {...}, ...},\n",
+       " {FsrPhoton: [], Electron: [{...}], SoftActivityJetHT5: 16.1, RawMET: ..., ...},\n",
+       " {FsrPhoton: [], Electron: [], SoftActivityJetHT5: 28.5, RawMET: {...}, ...},\n",
+       " {FsrPhoton: [], Electron: [], SoftActivityJetHT5: 7, RawMET: {...}, ...}]\n",
+       "--------------------------------------------------------------------------------\n",
+       "type: 40 * event
" + ], + "text/plain": [ + ", ...] type='40 * event'>" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import awkward as ak\n", + "import numpy as np\n", + "from coffea.nanoevents import NanoEventsFactory, NanoAODSchema\n", + "from matplotlib import pyplot as plt\n", + "\n", + "\n", + "events = NanoEventsFactory.from_root(\n", + " {\"../tests/samples/nano_dy.root\": \"Events\"},\n", + " metadata={\"dataset\": \"nano_dy\"},\n", + " schemaclass=NanoAODSchema,\n", + " permit_dask=False,\n", + ").events()\n", + "\n", + "events" + ] + }, + { + "cell_type": "markdown", + "id": "919582c0-9dc0-40d0-8e25-076dc4a848cd", + "metadata": {}, + "source": [ + "Now let's import `PackedSelection`, and create an instance of it." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e4c80e4f-f4bd-4a92-b5fe-4c9faaf22bf7", + "metadata": {}, + "outputs": [], + "source": [ + "from coffea.analysis_tools import PackedSelection\n", + "\n", + "selection = PackedSelection()" + ] + }, + { + "cell_type": "markdown", + "id": "18124df0-d370-4c3e-9e25-a9500eb1daf1", + "metadata": {}, + "source": [ + "We can create a boolean mask and add this to our selection by using the `add` method. This adds the following \"cut\" to our selection and names it \"twoElectron\"." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "b3d0e60b-48b6-4c8b-bb1a-7d8e295cd23b", + "metadata": {}, + "outputs": [], + "source": [ + "selection.add(\"twoElectron\", ak.num(events.Electron) == 2)" + ] + }, + { + "cell_type": "markdown", + "id": "324c3acb-eb43-440a-a23e-3e31dded970a", + "metadata": {}, + "source": [ + "We've added one \"cut\" to our selection. Now let's add a couple more." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "0e45733b-36dc-43b5-94d2-6cc9f974e830", + "metadata": {}, + "outputs": [], + "source": [ + "selection.add(\"eleOppSign\", ak.sum(events.Electron.charge, axis=1) == 0)\n", + "selection.add(\"noElectron\", ak.num(events.Electron) == 0)" + ] + }, + { + "cell_type": "markdown", + "id": "71edfec0-537d-42a7-86df-f96423f69371", + "metadata": {}, + "source": [ + "To avoid repeating calling `add` multiple times, we can just use the `add_multiple` method which does just that." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "f16c75b5-71bf-4957-835e-efabda6978cf", + "metadata": {}, + "outputs": [], + "source": [ + "selection.add_multiple(\n", + " {\n", + " \"twoMuon\": ak.num(events.Muon) == 2,\n", + " \"muOppSign\": ak.sum(events.Muon.charge, axis=1) == 0,\n", + " \"noMuon\": ak.num(events.Muon) == 0,\n", + " \"leadPt20\": ak.any(events.Electron.pt >= 20.0, axis=1)\n", + " | ak.any(events.Muon.pt >= 20.0, axis=1),\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "d0e968df-afdd-413b-a7ab-29f05f1e25c4", + "metadata": {}, + "source": [ + "By viewing the `PackedSelection` instance, one can see the names of the added selections, whether it is operating in delayed mode or not, the number of added selections and the maximum supported number of selections." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "927d8de4-53b7-4250-b24c-17fa16c16526", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "PackedSelection(selections=('twoElectron', 'eleOppSign', 'noElectron', 'twoMuon', 'muOppSign', 'noMuon', 'leadPt20'), delayed_mode=False, items=7, maxitems=32)\n" + ] + } + ], + "source": [ + "print(selection)" + ] + }, + { + "cell_type": "markdown", + "id": "434fa647-9e54-4678-adf5-c75e25cc032e", + "metadata": {}, + "source": [ + "To evaluate a boolean mask (e.g. to filter events) we can use the `selection.all(*names)` function, which will compute the logical AND of all listed boolean selections." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "0055353a-735b-4ac7-a2f1-698d2d42008b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([False, False, True, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, True, True, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "selection.all(\"twoElectron\", \"noMuon\", \"leadPt20\")" + ] + }, + { + "cell_type": "markdown", + "id": "b81d7b70-29cd-46b1-8d23-8f538b109c54", + "metadata": {}, + "source": [ + "We can also be more specific and require that a specific set of selections have a given value (with the unspecified ones allowed to be either true or false) using `selection.require`." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "5e6a478e-e69b-4181-8f8a-398bd6810375", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([False, False, False, True, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "selection.require(twoElectron=True, noMuon=True, eleOppSign=False)" + ] + }, + { + "cell_type": "markdown", + "id": "52a11d5f-3d39-4146-b38d-f783039ccac9", + "metadata": {}, + "source": [ + "There exist also the `allfalse` and `any` methods where the first one is the opposite of `all` and the second one is a logical OR between all listed boolean selections." + ] + }, + { + "cell_type": "markdown", + "id": "9e36a33b-2fcc-4380-8a0c-9c3402315e61", + "metadata": {}, + "source": [ + "Using `PackedSelection`, we are now able to perform an N-1 style selection using the `nminusone(*names)` method. This will perform an N-1 style selection by using as \"N\" the provided names and will exclude each named cut one at a time in order. In the end it will also peform a selection using all N cuts." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "77bc1bdc-3449-425a-8853-e2622fc4ed94", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "NminusOne(selections=('twoElectron', 'noMuon', 'leadPt20'))" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nminusone = selection.nminusone(\"twoElectron\", \"noMuon\", \"leadPt20\")\n", + "nminusone" + ] + }, + { + "cell_type": "markdown", + "id": "164c7fb8-f582-4892-8e78-a54e47820721", + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "source": [ + "This returns an `NminusOne` object which has the following methods: `result()`, `print()`, `yieldhist()`, `to_npz()` and `plot_vars()`" + ] + }, + { + "cell_type": "markdown", + "id": "7945c67d-5488-426c-b81c-56e2d09cee96", + "metadata": {}, + "source": [ + "Let's look at the results of the N-1 selection." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "eef67293-1f22-4ea0-ad7a-ac95a424a14c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " ('labels', 'nev', 'masks')\n" + ] + } + ], + "source": [ + "res = nminusone.result()\n", + "print(type(res), res._fields)" + ] + }, + { + "cell_type": "markdown", + "id": "7cdcfac7-ec59-447d-a285-609332042306", + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "source": [ + "This is just a `namedtuple` with the attributes `labels`, `nev` and `masks`. So we can say:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "5ee9d9e6-b496-4cc3-9d36-578ce096ff57", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(['initial', 'N - twoElectron', 'N - noMuon', 'N - leadPt20', 'N'],\n", + " [40, 10, 3, 5, 3],\n", + " [array([False, True, True, False, False, False, False, False, False,\n", + " True, False, False, False, False, False, True, True, False,\n", + " False, False, True, True, False, False, False, False, False,\n", + " True, False, True, False, False, False, True, False, False,\n", + " False, False, False, False]),\n", + " array([False, False, True, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, True, True, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False]),\n", + " array([False, False, True, True, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " True, False, True, True, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False]),\n", + " array([False, False, True, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, True, True, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False])])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "labels, nev, masks = res\n", + "labels, nev, masks" + ] + }, + { + "cell_type": "markdown", + "id": "e321f2b4-3e35-423c-a0a2-7b878ae7a260", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "`labels` is a list of labels of each mask that is applied, `nev` is a list of the number of events that survive each mask, and `masks` is a list of boolean masks (arrays) of which events survive each selection.\n", + "You can also choose to print the statistics of your N-1 selection in a similar fashion to `RDataFrame`." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "abe984e8-a4f1-4769-8d7c-60ce565b2d56", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "N-1 selection stats:\n", + "Ignoring twoElectron : pass = 10 all = 40 -- eff = 25.0 %\n", + "Ignoring noMuon : pass = 3 all = 40 -- eff = 7.5 %\n", + "Ignoring leadPt20 : pass = 5 all = 40 -- eff = 12.5 %\n", + "All cuts : pass = 3 all = 40 -- eff = 7.5 %\n" + ] + } + ], + "source": [ + "nminusone.print()" + ] + }, + { + "cell_type": "markdown", + "id": "282ebb23-70f8-43e1-bb60-99770fcf3543", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "Or get a histogram of your total event yields. This just returns a `hist.Hist` object and we can plot it with its backends to `mplhep`." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "ef6b795a-3205-4850-a366-791bf625f094", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "h, labels = nminusone.yieldhist()\n", + "h.plot1d()\n", + "plt.xticks(plt.gca().get_xticks(), labels, rotation=45)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "0080510c-aa7c-4492-b1a1-28601d4e7a27", + "metadata": {}, + "source": [ + "You can also save the results of the N-1 selection to a `.npz` file for later use." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "d6bb3514-403c-441a-bf85-d88304e556c4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "labels: ['initial' 'N - twoElectron' 'N - noMuon' 'N - leadPt20' 'N']\n", + "nev: [40 10 3 5 3]\n", + "masks: [[False True True False False False False False False True False False\n", + " False False False True True False False False True True False False\n", + " False False False True False True False False False True False False\n", + " False False False False]\n", + " [False False True False False False False False False False False False\n", + " False False False False False False False False True True False False\n", + " False False False False False False False False False False False False\n", + " False False False False]\n", + " [False False True True False False False False False False False False\n", + " False False False False False False True False True True False False\n", + " False False False False False False False False False False False False\n", + " False False False False]\n", + " [False False True False False False False False False False False False\n", + " False False False False False False False False True True False False\n", + " False False False False False False False False False False False False\n", + " False False False False]]\n" + ] + } + ], + "source": [ + "nminusone.to_npz(\"nminusone_results.npz\")\n", + "\n", + "with np.load(\"nminusone_results.npz\") as f:\n", + " for i in f.files:\n", + " print(f\"{i}: {f[i]}\")" + ] + }, + { + "cell_type": "markdown", + "id": "abfab631-e4bd-40b7-a3e8-faff7c87ed76", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "Finally, we can ask from this object to create histograms of different variables, masking them with our N-1 selection.\n", + "What it will output is a list of histograms, one for each requested variable, where the x-axis is the distribution of the variable, and the y-axis is the mask that was applied.\n", + "It is essentially slices of how the variable distribution evolves as each N-1 or N selection is applied. It does also return a list of labels of the masks to keep track.\n", + "\n", + "Note that the variables are parsed using a dictonary of `name: array` pairs and that the arrays will of course be flattened to be histogrammed." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "b21100ec-06e8-4e94-9966-925a512212b7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "([Hist(\n", + " Regular(20, 5.81891, 60.0685, name='Ept'),\n", + " Integer(0, 5, name='N-1'),\n", + " storage=Double()) # Sum: 60.0,\n", + " Hist(\n", + " Regular(20, -2.93115, 3.11865, name='Ephi'),\n", + " Integer(0, 5, name='N-1'),\n", + " storage=Double()) # Sum: 60.0],\n", + " ['initial', 'N - twoElectron', 'N - noMuon', 'N - leadPt20', 'N'])" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hs, labels = nminusone.plot_vars(\n", + " {\"Ept\": events.Electron.pt, \"Ephi\": events.Electron.phi}\n", + ")\n", + "hs, labels" + ] + }, + { + "cell_type": "markdown", + "id": "ef70ef89-f1d8-4692-a2fd-044d20ffd706", + "metadata": {}, + "source": [ + "And we can actually plot those histograms using again the `mplhep` backend." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "d7ba7d28-b862-4f59-97df-27a1df3361f5", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "for h in hs:\n", + " h.plot2d()\n", + " plt.yticks(plt.gca().get_yticks(), labels, rotation=0)\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "f78249e8-8ebe-43b3-8dd5-b996db6cfa2d", + "metadata": {}, + "source": [ + "You can slice these histograms to view and plot the 1D histogram at each step of the selection. For example, if we want the $P_T$ of the electrons at the final step (index 4) of the selection, we can do the following." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "2cda4d43-4933-4fe1-a19b-1db74a5b62d0", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "hs[0][:, 4].plot1d(yerr=0)\n", + "plt.yscale(\"log\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "328c05ef-6ddf-478d-93f2-c260436b4b0e", + "metadata": {}, + "source": [ + "Because this automatic bining doesn't look great, for $P_T$ at least, the user has the ability to customize the axes or pass in their own axes objects." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "10c5faad-e4a0-458c-9097-26acee2bba12", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on method plot_vars in module coffea.analysis_tools:\n", + "\n", + "plot_vars(vars, axes=None, bins=None, start=None, stop=None, edges=None, transform=None) method of coffea.analysis_tools.NminusOne instance\n", + " Plot the histograms of variables for each step of the N-1 selection\n", + " \n", + " Parameters\n", + " ----------\n", + " vars : dict\n", + " A dictionary in the form ``{name: array}`` where ``name`` is the name of the variable,\n", + " and ``array`` is the corresponding array of values.\n", + " The arrays must be the same length as each mask of the N-1 selection.\n", + " axes : list of hist.axis objects, optional\n", + " The axes objects to histogram the variables on. This will override all the following arguments that define axes.\n", + " Must be the same length as ``vars``.\n", + " bins : iterable of integers or Nones, optional\n", + " The number of bins for each variable histogram. If not specified, it defaults to 20.\n", + " Must be the same length as ``vars``.\n", + " start : iterable of floats or integers or Nones, optional\n", + " The lower edge of the first bin for each variable histogram. If not specified, it defaults to the minimum value of the variable array.\n", + " Must be the same length as ``vars``.\n", + " stop : iterable of floats or integers or Nones, optional\n", + " The upper edge of the last bin for each variable histogram. If not specified, it defaults to the maximum value of the variable array.\n", + " Must be the same length as ``vars``.\n", + " edges : list of iterables of floats or integers, optional\n", + " The bin edges for each variable histogram. This overrides ``bins``, ``start``, and ``stop`` if specified.\n", + " Must be the same length as ``vars``.\n", + " transform : iterable of hist.axis.transform objects or Nones, optional\n", + " The transforms to apply to each variable histogram axis. If not specified, it defaults to None.\n", + " Must be the same length as ``vars``.\n", + " \n", + " Returns\n", + " -------\n", + " hists : list of hist.Hist or hist.dask.Hist objects\n", + " A list of 2D histograms of the variables for each step of the N-1 selection.\n", + " The first axis is the variable, the second axis is the N-1 selection step.\n", + " labels : list of strings\n", + " The bin labels of y axis of the histogram.\n", + "\n" + ] + } + ], + "source": [ + "help(nminusone.plot_vars)" + ] + }, + { + "cell_type": "markdown", + "id": "5c3cd75f-858b-41a3-8b0b-060361a51eba", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "Cutflow is implemented in a similar manner to the N-1 selection. We just have to use the `cutflow(*names)` function which will return a `Cutflow` object" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "a856a24d-487d-4308-8236-4f89bb5917ca", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Cutflow(selections=('noMuon', 'twoElectron', 'leadPt20'))" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cutflow = selection.cutflow(\"noMuon\", \"twoElectron\", \"leadPt20\")\n", + "cutflow" + ] + }, + { + "cell_type": "markdown", + "id": "ec84e2a3-b2ab-4352-b716-d568814f3300", + "metadata": {}, + "source": [ + "The methods of this object are similar to the `NminusOne` object. The only difference is that now we seperate things in either \"onecut\" or \"cutflow\". \"onecut\" represents results where each cut is applied alone, while \"cutflow\" represents results where the cuts are applied cumulatively in order." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "96692df8-d25b-4e57-9eea-59acdc1a7f78", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " ('labels', 'nevonecut', 'nevcutflow', 'masksonecut', 'maskscutflow')\n" + ] + }, + { + "data": { + "text/plain": [ + "(['initial', 'noMuon', 'twoElectron', 'leadPt20'],\n", + " [40, 28, 5, 17],\n", + " [40, 28, 5, 3],\n", + " [array([ True, True, True, True, False, False, False, True, True,\n", + " True, False, True, True, True, False, True, True, True,\n", + " True, True, True, True, True, False, False, True, False,\n", + " True, False, True, False, False, True, True, False, True,\n", + " True, True, True, True]),\n", + " array([False, False, True, True, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " True, False, True, True, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False]),\n", + " array([False, True, True, False, True, True, True, False, False,\n", + " True, False, False, False, False, False, True, True, False,\n", + " False, False, True, True, False, True, True, False, True,\n", + " True, False, True, False, True, False, True, False, False,\n", + " False, False, False, False])],\n", + " [array([ True, True, True, True, False, False, False, True, True,\n", + " True, False, True, True, True, False, True, True, True,\n", + " True, True, True, True, True, False, False, True, False,\n", + " True, False, True, False, False, True, True, False, True,\n", + " True, True, True, True]),\n", + " array([False, False, True, True, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " True, False, True, True, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False]),\n", + " array([False, False, True, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, True, True, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False])])" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res = cutflow.result()\n", + "print(type(res), res._fields)\n", + "labels, nevonecut, nevcutflow, masksonecut, maskscutflow = res\n", + "labels, nevonecut, nevcutflow, masksonecut, maskscutflow" + ] + }, + { + "cell_type": "markdown", + "id": "e2a06c5f-23d0-4ad0-ab36-dfec7796b8d4", + "metadata": {}, + "source": [ + "As you can see, again we have the same `labels`, `nev` and `masks` only now we have two \"versions\" of them since they've been split into \"onecut\" and \"cutflow\".\n", + "You can again print the statistics of the cutflow exactly like `RDataFrame`." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "4db657a5-f16f-4fef-9956-649269e4ae2c", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cutflow stats:\n", + "Cut noMuon : pass = 28 cumulative pass = 28 all = 40 -- eff = 70.0 % -- cumulative eff = 70.0 %\n", + "Cut twoElectron : pass = 5 cumulative pass = 5 all = 40 -- eff = 12.5 % -- cumulative eff = 12.5 %\n", + "Cut leadPt20 : pass = 17 cumulative pass = 3 all = 40 -- eff = 42.5 % -- cumulative eff = 7.5 %\n" + ] + } + ], + "source": [ + "cutflow.print()" + ] + }, + { + "cell_type": "markdown", + "id": "e6aa8edf-3953-410d-b46f-f7d54adda821", + "metadata": {}, + "source": [ + "Again, you can extract yield hists, only now there are two of them." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "e9e3db1b-8836-4a85-8e56-5819134b7343", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAh8AAAHnCAYAAAAYdD84AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA/UUlEQVR4nO3deVxU9f7H8fegbIrgkooKuC+5oOKC5JKpiWg+XCizzKVMK1Fzy+KWmak/upZLCy6VpTc121yyTENzy9TcyEzDtcQCTBNQEUT4/v7wMjfKFhTOMPB6Ph48rnPOYeZDc4EXZ86cYzPGGAEAAFjExdEDAACA4oX4AAAAliI+AACApYgPAABgKeIDAABYivgAAACWIj4AAICliA8AAGCpko4e4Peys7P1888/q0yZMrLZbI4eBwAA/APGGF24cEFVq1aVi8tf79sodPHx888/y9/f39FjAACAGxAfHy8/P7+/3KbQxUeZMmUkXRve29vbwdMAAIB/IjU1Vf7+/vbf43+l0MVHzkst3t7exAcAAE7mnxwywQGnAADAUsQHAACwFPEBAAAsRXwAAABLER8AAMBSxAcAALAU8QEAACxFfAAAAEsRHwAAwFLEBwAAsNRNxccLL7wgm82mMWPG2Jelp6crIiJCFSpUkJeXl8LDw5WUlHSzcwIAgCLihuNj9+7dWrBggQIDA3MtHzt2rNasWaMPPvhAW7Zs0c8//6y+ffve9KAAAKBouKH4uHjxogYMGKA33nhD5cqVsy9PSUnRwoULNWvWLHXq1EktWrTQ22+/ra+++ko7d+7Mt6EBAIDzuqGr2kZERKhHjx7q0qWLpk2bZl++d+9eZWZmqkuXLvZlDRo0UEBAgHbs2KE2bdrc/MQ3yBijy5lZDnt85C9P1xL/6MqJAIDCJ8/xsXz5cu3bt0+7d+/+w7rExES5ubmpbNmyuZZXrlxZiYmJ172/jIwMZWRk2G+npqbmdaR/5HJmlho+u75A7hvWO/R8qEq53VA7AwAcLE8vu8THx+vxxx/X0qVL5eHhkS8DREVFycfHx/7h7++fL/cLAAAKpzz96bh3716dOXNGQUFB9mVZWVnaunWrXnvtNa1fv15XrlxRcnJyrr0fSUlJ8vX1ve59RkZGaty4cfbbqampBR4ge57polJuJQr0MZD/0q5kqeW0DY4eAwBwk/IUH507d9a3336ba9mDDz6oBg0a6Mknn5S/v79cXV21ceNGhYeHS5Li4uJ06tQphYSEXPc+3d3d5e7ufoPj35hSbiXYZQ8AgIPk6TdwmTJl1Lhx41zLSpcurQoVKtiXDx06VOPGjVP58uXl7e2tUaNGKSQkxKEHmwIAgMIj3//8nz17tlxcXBQeHq6MjAyFhoZq7ty5+f0wAADASd10fGzevDnXbQ8PD0VHRys6Ovpm7xoAABRBXNsFAABYivgAAACWIj4AAICliA8AAGAp4gMAAFiK+AAAAJYiPgAAgKWIDwAAYCniAwAAWIr4AAAAliI+AACApYgPAABgKeIDAABYivgAAACWIj4AAICliA8AAGAp4gMAAFiK+AAAAJYiPgAAgKWIDwAAYCniAwAAWIr4AAAAliI+AACApYgPAABgKeIDAABYivgAAACWIj4AAICliA8AAGAp4gMAAFiK+AAAAJYiPgAAgKWIDwAAYCniAwAAWCpP8TFv3jwFBgbK29tb3t7eCgkJ0WeffWZf37FjR9lstlwfjz76aL4PDQAAnFfJvGzs5+enF154QXXr1pUxRosXL1avXr20f/9+NWrUSJI0bNgwPf/88/bPKVWqVP5ODAAAnFqe4qNnz565bk+fPl3z5s3Tzp077fFRqlQp+fr65t+EAACgSLnhYz6ysrK0fPlyXbp0SSEhIfblS5cu1S233KLGjRsrMjJSaWlp+TIoAAAoGvK050OSvv32W4WEhCg9PV1eXl5auXKlGjZsKEm6//77Vb16dVWtWlUHDhzQk08+qbi4OK1YseJP7y8jI0MZGRn226mpqTfwZQAAAGeR5/ioX7++YmNjlZKSog8//FCDBw/Wli1b1LBhQw0fPty+XZMmTVSlShV17txZx48fV+3ata97f1FRUZoyZcqNfwUAAMCp5PllFzc3N9WpU0ctWrRQVFSUmjZtqpdffvm62wYHB0uSjh079qf3FxkZqZSUFPtHfHx8XkcCAABOJM97Pn4vOzs718smvxUbGytJqlKlyp9+vru7u9zd3W92DAAA4CTyFB+RkZEKCwtTQECALly4oGXLlmnz5s1av369jh8/rmXLlql79+6qUKGCDhw4oLFjx6pDhw4KDAwsqPkBAICTyVN8nDlzRoMGDVJCQoJ8fHwUGBio9evX684771R8fLw2bNigOXPm6NKlS/L391d4eLieeeaZgpodAAA4oTzFx8KFC/90nb+/v7Zs2XLTAwEAgKKNa7sAAABLER8AAMBSxAcAALAU8QEAACxFfAAAAEsRHwAAwFLEBwAAsBTxAQAALEV8AAAASxEfAADAUsQHAACwFPEBAAAsRXwAAABLER8AAMBSxAcAALAU8QEAACxFfAAAAEsRHwAAwFLEBwAAsBTxAQAALEV8AAAASxEfAADAUsQHAACwFPEBAAAsRXwAAABLER8AAMBSxAcAALAU8QEAACxFfAAAAEsRHwAAwFLEBwAAsBTxAQAALEV8AAAAS+UpPubNm6fAwEB5e3vL29tbISEh+uyzz+zr09PTFRERoQoVKsjLy0vh4eFKSkrK96EBAIDzylN8+Pn56YUXXtDevXu1Z88ederUSb169dJ3330nSRo7dqzWrFmjDz74QFu2bNHPP/+svn37FsjgAADAOZXMy8Y9e/bMdXv69OmaN2+edu7cKT8/Py1cuFDLli1Tp06dJElvv/22br31Vu3cuVNt2rTJv6kBAIDTuuFjPrKysrR8+XJdunRJISEh2rt3rzIzM9WlSxf7Ng0aNFBAQIB27NiRL8MCAADnl6c9H5L07bffKiQkROnp6fLy8tLKlSvVsGFDxcbGys3NTWXLls21feXKlZWYmPin95eRkaGMjAz77dTU1LyOBAAAnEie93zUr19fsbGx2rVrlx577DENHjxYhw4duuEBoqKi5OPjY//w9/e/4fsCAACFX57jw83NTXXq1FGLFi0UFRWlpk2b6uWXX5avr6+uXLmi5OTkXNsnJSXJ19f3T+8vMjJSKSkp9o/4+Pg8fxEAAMB53PR5PrKzs5WRkaEWLVrI1dVVGzdutK+Li4vTqVOnFBIS8qef7+7ubn/rbs4HAAAouvJ0zEdkZKTCwsIUEBCgCxcuaNmyZdq8ebPWr18vHx8fDR06VOPGjVP58uXl7e2tUaNGKSQkhHe6AAAAuzzFx5kzZzRo0CAlJCTIx8dHgYGBWr9+ve68805J0uzZs+Xi4qLw8HBlZGQoNDRUc+fOLZDBAQCAc8pTfCxcuPAv13t4eCg6OlrR0dE3NRQAACi6uLYLAACwFPEBAAAsRXwAAABLER8AAMBSxAcAALAU8QEAACxFfAAAAEsRHwAAwFLEBwAAsFSeznAKFBZpV7IcPQJugqdrCdlsNkePAcBBiA84pZbTNjh6BNyEQ8+HqpQbP36A4oqXXQAAgKX40wNOw9O1hA49H+roMXCD0q5ksccKgCTiA07EZrOxqx4AigBedgEAAJYiPgAAgKWIDwAAYCniAwAAWIr4AAAAliI+AACApYgPAABgKeIDAABYivgAAACWIj4AAICliA8AAGAp4gMAAFiK+AAAAJYiPgAAgKWIDwAAYCniAwAAWIr4AAAAliI+AACApYgPAABgqTzFR1RUlFq1aqUyZcqoUqVK6t27t+Li4nJt07FjR9lstlwfjz76aL4ODQAAnFee4mPLli2KiIjQzp07FRMTo8zMTHXt2lWXLl3Ktd2wYcOUkJBg/5gxY0a+Dg0AAJxXybxsvG7duly3Fy1apEqVKmnv3r3q0KGDfXmpUqXk6+ubPxMCAIAi5aaO+UhJSZEklS9fPtfypUuX6pZbblHjxo0VGRmptLS0m3kYAABQhORpz8dvZWdna8yYMWrbtq0aN25sX37//ferevXqqlq1qg4cOKAnn3xScXFxWrFixXXvJyMjQxkZGfbbqampNzoSAABwAjccHxERETp48KC+/PLLXMuHDx9u/3eTJk1UpUoVde7cWcePH1ft2rX/cD9RUVGaMmXKjY4BAACczA297DJy5Eh98skn2rRpk/z8/P5y2+DgYEnSsWPHrrs+MjJSKSkp9o/4+PgbGQkAADiJPO35MMZo1KhRWrlypTZv3qyaNWv+7efExsZKkqpUqXLd9e7u7nJ3d8/LGAAAwInlKT4iIiK0bNkyrV69WmXKlFFiYqIkycfHR56enjp+/LiWLVum7t27q0KFCjpw4IDGjh2rDh06KDAwsEC+AAAA4FzyFB/z5s2TdO1EYr/19ttva8iQIXJzc9OGDRs0Z84cXbp0Sf7+/goPD9czzzyTbwMDAADnlueXXf6Kv7+/tmzZclMDAQCAoo1ruwAAAEsRHwAAwFLEBwAAsBTxAQAALEV8AAAASxEfAADAUsQHAACwFPEBAAAsRXwAAABLER8AAMBSxAcAALAU8QEAACxFfAAAAEsRHwAAwFLEBwAAsBTxAQAALEV8AAAASxEfAADAUsQHAACwFPEBAAAsRXwAAABLER8AAMBSxAcAALAU8QEAACxFfAAAAEsRHwAAwFLEBwAAsBTxAQAALEV8AAAASxEfAADAUsQHAACwFPEBAAAsRXwAAABL5Sk+oqKi1KpVK5UpU0aVKlVS7969FRcXl2ub9PR0RUREqEKFCvLy8lJ4eLiSkpLydWgAAOC88hQfW7ZsUUREhHbu3KmYmBhlZmaqa9euunTpkn2bsWPHas2aNfrggw+0ZcsW/fzzz+rbt2++Dw4AAJxTybxsvG7duly3Fy1apEqVKmnv3r3q0KGDUlJStHDhQi1btkydOnWSJL399tu69dZbtXPnTrVp0yb/JgcAAE7ppo75SElJkSSVL19ekrR3715lZmaqS5cu9m0aNGiggIAA7dix42YeCgAAFBF52vPxW9nZ2RozZozatm2rxo0bS5ISExPl5uamsmXL5tq2cuXKSkxMvO79ZGRkKCMjw347NTX1RkcCAABO4Ib3fEREROjgwYNavnz5TQ0QFRUlHx8f+4e/v/9N3R8AACjcbig+Ro4cqU8++USbNm2Sn5+ffbmvr6+uXLmi5OTkXNsnJSXJ19f3uvcVGRmplJQU+0d8fPyNjAQAAJxEnuLDGKORI0dq5cqV+uKLL1SzZs1c61u0aCFXV1dt3LjRviwuLk6nTp1SSEjIde/T3d1d3t7euT4AAEDRladjPiIiIrRs2TKtXr1aZcqUsR/H4ePjI09PT/n4+Gjo0KEaN26cypcvL29vb40aNUohISG80wUAAEjKY3zMmzdPktSxY8dcy99++20NGTJEkjR79my5uLgoPDxcGRkZCg0N1dy5c/NlWAAA4PzyFB/GmL/dxsPDQ9HR0YqOjr7hoQAAQNHFtV0AAICliA8AAGAp4gMAAFiK+AAAAJYiPgAAgKWIDwAAYCniAwAAWIr4AAAAliI+AACApYgPAABgKeIDAABYivgAAACWIj4AAICliA8AAGAp4gMAAFiK+AAAAJYiPgAAgKWIDwAAYKmSjh4AAOAcjDG6nJnl6DGQTzxdS8hmsznksYkPAMA/cjkzSw2fXe/oMZBPDj0fqlJujskAXnYBAACWYs8HACDP9jzTRaXcSjh6DORR2pUstZy2wdFjEB8AgLwr5VbCYbvs4fx42QUAAFiK+AAAAJYiPgAAgKWIDwAAYCniAwAAWIr4AAAAliI+AACApYgPAABgKeIDAABYivgAAACWynN8bN26VT179lTVqlVls9m0atWqXOuHDBkim82W66Nbt275NS8AAHByeY6PS5cuqWnTpoqOjv7Tbbp166aEhAT7x7vvvntTQwIAgKIjz1cFCgsLU1hY2F9u4+7uLl9f3xseCgAAFF0FcszH5s2bValSJdWvX1+PPfaYzp07VxAPAwAAnFC+Xw+5W7du6tu3r2rWrKnjx4/rX//6l8LCwrRjxw6VKFHiD9tnZGQoIyPDfjs1NTW/RwIAAIVIvsdH//797f9u0qSJAgMDVbt2bW3evFmdO3f+w/ZRUVGaMmVKfo8BAAAKqQJ/q22tWrV0yy236NixY9ddHxkZqZSUFPtHfHx8QY8EAAAcKN/3fPze6dOnde7cOVWpUuW6693d3eXu7l7QYwAAgEIiz/Fx8eLFXHsxTp48qdjYWJUvX17ly5fXlClTFB4eLl9fXx0/flwTJ05UnTp1FBoamq+DAwAA55Tn+NizZ4/uuOMO++1x48ZJkgYPHqx58+bpwIEDWrx4sZKTk1W1alV17dpVU6dOZe8GAACQdAPx0bFjRxlj/nT9+vXrb2ogAABQtHFtFwAAYCniAwAAWIr4AAAAliI+AACApYgPAABgKeIDAABYivgAAACWIj4AAICliA8AAGAp4gMAAFiK+AAAAJYiPgAAgKWIDwAAYCniAwAAWIr4AAAAliI+AACApYgPAABgKeIDAABYivgAAACWIj4AAICliA8AAGAp4gMAAFiK+AAAAJYiPgAAgKWIDwAAYCniAwAAWIr4AAAAliI+AACApYgPAABgKeIDAABYivgAAACWIj4AAICliA8AAGCpPMfH1q1b1bNnT1WtWlU2m02rVq3Ktd4Yo2effVZVqlSRp6enunTpoqNHj+bXvAAAwMnlOT4uXbqkpk2bKjo6+rrrZ8yYoVdeeUXz58/Xrl27VLp0aYWGhio9Pf2mhwUAAM6vZF4/ISwsTGFhYdddZ4zRnDlz9Mwzz6hXr16SpP/85z+qXLmyVq1apf79+9/ctAAAwOnl6zEfJ0+eVGJiorp06WJf5uPjo+DgYO3YsSM/HwoAADipPO/5+CuJiYmSpMqVK+daXrlyZfu638vIyFBGRob9dmpqan6OBAAAChmHv9slKipKPj4+9g9/f39HjwQAAApQvsaHr6+vJCkpKSnX8qSkJPu634uMjFRKSor9Iz4+Pj9HAgAAhUy+xkfNmjXl6+urjRs32pelpqZq165dCgkJue7nuLu7y9vbO9cHAAAouvJ8zMfFixd17Ngx++2TJ08qNjZW5cuXV0BAgMaMGaNp06apbt26qlmzpiZNmqSqVauqd+/e+Tk3AABwUnmOjz179uiOO+6w3x43bpwkafDgwVq0aJEmTpyoS5cuafjw4UpOTla7du20bt06eXh45N/UAADAaeU5Pjp27ChjzJ+ut9lsev755/X888/f1GAAAKBocvi7XQAAQPFCfAAAAEsRHwAAwFLEBwAAsBTxAQAALEV8AAAASxEfAADAUsQHAACwFPEBAAAsRXwAAABLER8AAMBSxAcAALAU8QEAACxFfAAAAEsRHwAAwFLEBwAAsBTxAQAALEV8AAAASxEfAADAUsQHAACwFPEBAAAsRXwAAABLER8AAMBSxAcAALAU8QEAACxFfAAAAEsRHwAAwFLEBwAAsBTxAQAALEV8AAAASxEfAADAUsQHAACwFPEBAAAsle/x8dxzz8lms+X6aNCgQX4/DAAAcFIlC+JOGzVqpA0bNvzvQUoWyMMAAAAnVCBVULJkSfn6+hbEXQMAACdXIPFx9OhRVa1aVR4eHgoJCVFUVJQCAgIK4qEAOKG0K1mOHgE3gOcN+SXf4yM4OFiLFi1S/fr1lZCQoClTpqh9+/Y6ePCgypQp84ftMzIylJGRYb+dmpqa3yMBKGRaTtvw9xsBKLLyPT7CwsLs/w4MDFRwcLCqV6+u999/X0OHDv3D9lFRUZoyZUp+jwEAAAqpAj8StGzZsqpXr56OHTt23fWRkZEaN26c/XZqaqr8/f0LeiwAFvN0LaFDz4c6egzkE0/XEo4eAU6swOPj4sWLOn78uAYOHHjd9e7u7nJ3dy/oMQA4mM1mUyk33vkGoADO8zFhwgRt2bJFP/zwg7766iv16dNHJUqU0H333ZffDwUAAJxQvv8Zcvr0ad133306d+6cKlasqHbt2mnnzp2qWLFifj8UAABwQvkeH8uXL8/vuwQAAEUI13YBAACWIj4AAICliA8AAGAp4gMAAFiK+AAAAJYiPgAAgKWIDwAAYCniAwAAWIr4AAAAliI+AACApYgPAABgKeIDAABYivgAAACWIj4AAICliA8AAGAp4gMAAFiK+AAAAJYiPgAAgKWIDwAAYCniAwAAWIr4AAAAliI+AACApYgPAABgKeIDAABYivgAAACWIj4AAICliA8AAGAp4gMAAFiK+AAAAJYiPgAAgKWIDwAAYCniAwAAWIr4AAAAliqw+IiOjlaNGjXk4eGh4OBgff311wX1UAAAwIkUSHy89957GjdunCZPnqx9+/apadOmCg0N1ZkzZwri4QAAgBMpkPiYNWuWhg0bpgcffFANGzbU/PnzVapUKb311lsF8XAAAMCJ5Ht8XLlyRXv37lWXLl3+9yAuLurSpYt27NiR3w8HAACcTMn8vsOzZ88qKytLlStXzrW8cuXK+v777/+wfUZGhjIyMuy3U1JSJEmpqan5OlfalavKzkiz3/dVt3z/0gEAKNQK8ndhzu9tY8zfbuvw38BRUVGaMmXKH5b7+/sX2GNWmVNgdw0AgFMoqN+FFy5ckI+Pz19uk+/xccstt6hEiRJKSkrKtTwpKUm+vr5/2D4yMlLjxo2z387Oztavv/6qChUqyGaz5fd4RVpqaqr8/f0VHx8vb29vR4+DG8Bz6Px4Dp0bz9+NM8bowoULqlq16t9um+/x4ebmphYtWmjjxo3q3bu3pGtBsXHjRo0cOfIP27u7u8vd3T3XsrJly+b3WMWKt7c33zROjufQ+fEcOjeevxvzd3s8chTIyy7jxo3T4MGD1bJlS7Vu3Vpz5szRpUuX9OCDDxbEwwEAACdSIPFx77336pdfftGzzz6rxMRENWvWTOvWrfvDQagAAKD4KbADTkeOHHndl1lQcNzd3TV58uQ/vIwF58Fz6Px4Dp0bz581bOafvCcGAAAgn3BhOQAAYCniAwAAWIr4AAAAliI+AACApYiPYuTMmTOSrp30DQCQ/3gPxz9DfBQTH330kXx9fRUbGysXFxcCBCgC+EVXeOT8TP39ZUH4WXt9vNW2mPjpp580YsQI7dixQzExMWratKmys7Pl4kJ/FkXGGK6NVMTkPKfnz59XVlaWbrnlFkePhP/K+Vl68uRJrVmzRjabTdWqVVPfvn0dPVqhRXwUI4mJiRoxYoQ2b96sTZs2ESBFRM4vpfj4eLm4uKhUqVIqV64cz20RtGrVKk2aNEnZ2dmqX7++oqOjVaVKFUePBUkHDx5Uhw4d1KRJE505c0YJCQnq3LmzZs6cqRo1ajh6vEKH+ChmEhISFBERQYAUMStXrtTo0aNVunRpZWdna/ny5QoKCnL0WMhHe/fuVWhoqCIiIlSpUiW99tpr8vDw0DvvvKPGjRs7erxiLS0tTd27d1ejRo0UHR2tc+fO6ciRI+rfv7+qVaumN998Uw0bNnT0mIUK8VEM/fTTTxo1ahQB4uRy9nicPHlSbdq00aRJk3TLLbdo9erVWr16tVauXKnQ0FBHj4l8cODAAZ08eVKxsbGaPHmyJOnChQtq3769jDFatmyZGjVq5OApi6+rV6+qbdu2evTRR3NdQPXMmTNq06aNatSoobVr18rDw4Oftf/Ff4EiLKcrf/zxR/3www86fPiwJKlatWqKjo7W7bffrjvuuEPffPMNB6E6IZvNps2bN2v//v16+OGHNXLkSPXv319vvfWWBg0apD59+mj9+vWOHhM3Keev6j59+uinn36yLy9Tpoy2bdsmm82mQYMG6ZtvvnHglMWbMUbnz5/XgQMH7MsyMzNVqVIlffHFFzpw4ICefvppSSI8chgUSdnZ2cYYY1atWmUaNWpk6tWrZypVqmSioqLs6xISEkzv3r1NpUqVzJ49exw5Lm5Aenq6ueuuu4zNZjM9evTItS4tLc0MHz7ceHt7mzVr1jhoQuSXQ4cOmWbNmplmzZqZ06dPG2P+9z2emppqAgICTNu2bU1GRoYjxywWcv67/96bb75p/Pz8zNKlS+3Lcp6Pl19+2TRv3twkJSX96ecXN8RHEfPb/2N/+umnxsvLy7z22mvm+PHjZs6cOcZms5nIyEhz5coVY8y1AOnUqZOpWbOmSU9Pd9TYuEEnTpwwgwcPNt7e3mb//v3GmP/9f+Dy5cvm/vvvN1WqVDEXL1504JTIi99+D//234cPHzZ+fn6mS5cuJikpKdf6CxcumOPHj1s7aDGUlZVljDHmzJkzZv/+/WbTpk32dXFxcWbw4MEmJCTEfPjhh7k+b8mSJaZu3brm/PnzFk5buBEfRcTnn39uUlNT7beTkpJMeHi4+fe//22MMebUqVOmVq1aplOnTsbV1dWMHz/eXL582RhjTGJioomPj3fI3Pjncn7RXL161aSlpdmXJyUlme7du5tbbrnFfPfdd7m2vXz5sklISLB+WNyQnOctJibGjBkzxnTv3t28+eabZu/evcaYawFSrVo106VLF3PmzJlcn4OClRMeBw4cMEFBQaZ+/fqmUqVKJjg42L7NV199Ze655x4TFBRk5s6da4y5thfyqaeeMiEhISY5OdkhsxdGxEcRsGLFCtO+fXv7X0PGGPPrr7+aV1991Zw6dcokJSWZxo0bm4cfftgYY8ykSZOMzWYzo0ePtu8BQeGW8wtm7dq15t577zUtW7Y0Y8aMMZ9//rkxxpizZ8+asLAwc8stt5hDhw7l+hw4l5UrVxoPDw8zZMgQ061bNxMYGGjat29v1q9fb4y5FiA1a9Y0rVq1Mr/88ouDpy1e4uLiTMWKFU1kZKSJjY01W7duNU2aNDEjRoywb7N//34zYcIE4+HhYerWrWtatmxpKlSoYPbt2+fAyQsf4qOIyHkd+NixY/ZdezmVPXv2bHPHHXfY/1KaPXu2ady4salcuTJ/FTuRjz/+2Li5uZkRI0aYp556yjRp0sS0a9fOzJs3zxhzbQ9Ir169jM1mM99//72Dp8WNSEhIMEFBQWbOnDn2ZZs2bTIPPPCA6dChgzlw4IAxxpiDBw+axo0bmx9//NFRoxY7Fy9eNAMGDDCPPPKIfVl2drZ58sknTZcuXXJtm5aWZg4ePGhmzpxpFi9ebI4dO2b1uIVeSUcf8Iqbc/XqVZUsWVLVqlXTkSNHdM8996hXr16aMGGCfHx8ZIzR4cOHZbPZVLFiRUnX3mr7+OOP6/7771epUqUc/BXg7xhjlJycrJdeeknPPfecIiMjJUkjRozQ1KlTtXTpUjVq1Ejt27fXyy+/LE9PT85u6kTMf98yffXqVbm6uiopKSnXicM6duwoY4wiIiL0/fffq0mTJmrUqJH27dsnV1dXB05evLi4uKhcuXJq0KCBfZnNZlPHjh21atUqpaWlqUSJEnJ3d5eHh4caNWrE25//Au/5cUK/fUtsyZLX+vGHH35QvXr11K5dO8XExOjVV19VSkqKbDabQkNDtWnTJj3wwAMKDw/XG2+8odtuu43wcBI2m02lS5dWcnKy/ZdNdna2/P39NXnyZJ05c8b+ltrq1atryZIlqlevniNHRh7YbDatXr1aM2bM0JkzZ1SlShUlJiZK+t/3+h133KFy5crpk08+sX8e4VGwzG9OgZWZmSlPT0899dRTioiIyLU+5zkqVaqU3NzcJEmpqakWT+t8iA8n5OLioqNHj2rEiBGSpA8//FChoaH6+eefFR0dreDgYK1cuVKvvvqqkpOT1bdvXy1cuFA///yzSpYsqa1bt3K2vUIu5wfb1atXJUkXL15UqVKl9OOPP9rXZ2dnq1q1aurYsaP27t1r37ZEiRKOGRp5kvMcHz58WAMHDpSfn59uvfVWhYSEaMqUKfryyy9znROifPnyql27tqPGLVZy9kb98ssvkq6F3tatW3X8+HFJ14Ljt3sXc55Lm82m8ePHq1evXsrMzLR+cGfioJd7cJPWrVtnbDab6dKli7HZbGbx4sW51j/++OOmRYsWZtq0afZjPy5dusR5AJzIrl27zN13320SExONMcYsX77c2Gw2M3/+/FzbhYeHm0ceeYQDTJ3Q9u3bzfLly8348eNzLQ8PDzfly5c306dPN2+88YYZN26c8fb2NocPH3bQpMXP2bNnTUhIiHnmmWfMqlWrjM1msx/0+1ubN282tWrVMsYYExkZaUqVKmV27Nhh9bhOh/hwIv/617/MV199Zb89ceJEY7PZTPv27e3LfhsXjz/+uAkODjaRkZG8xcsJLViwwDRr1sz079/f/k6m//u//zM2m80MHTrU/Otf/zIjRowwXl5e5uDBgw6eFn/lt2F49epV+/926NDB/kfE7/8wGD9+vGnfvr2pW7eu6dixo/08LrBGQkKC+fe//22qVatm3N3dzbJly4wxxmRmZubabuPGjaZFixZmwoQJxs3Nzf62aPw1XnZxEtnZ2Tp16pRKly5tX1a9enWNHDlS3377rQYNGiRJcnNzU0ZGhiRpzpw5at68ub788kv7Lnk4j2HDhmnkyJGKj4/XqFGjdO7cOUVGRuqjjz7S6dOntXXrVp0+fVrbt2/nwLZCzPx3F/758+clXXtZbPv27Tp9+rRWrlypvn37at++fdq3b599e0l66aWXtGbNGu3atUsff/yxmjVr5qgvoVjIOXbjypUrysrKkq+vrzp27KikpCR5e3vryJEjkq4dZ5eVlWX/vIsXL2rfvn1auHChduzYwQUd/yEuLOeENmzYIHd3d7Vv316S9Mknn2jAgAHq1auX/vOf/9i3i4uLU/369fXLL7/Y3+mCwinnYlPff/+9AgIC7AcDG2P0xhtv6D//+Y/8/Pz06quvqmLFikpNTZW3t7cuX74sT09PB0+Pv5OUlKRBgwapV69e8vX11d13360NGzaoU6dOSklJUe/evXXq1Cl9/PHHatSokT1Ycv4XBSvn+y8uLk4vvviijhw5oubNm6tJkyYKCgrSpk2b9Oabb6pPnz76v//7P0lSVlaWSpQoodOnTysiIkLTpk1TkyZNHPyVOBEH7nXBDerTp49xdXU1X375pTHm2i7dNWvWGB8fH/PAAw+YpKQkM2nSJNO4cWNz7tw5B0+Lv5JzllljjDly5Ihp1aqVeeSRR8ylS5fsyzMzM83LL79sqlSpYoYMGZLrZHJwDvHx8eaRRx4x9erVM+7u7uadd94xxvxvF35KSorp0KGDqV27tv0stbBGzplLY2NjTbly5UyfPn1Mr169TLVq1UxISIg5cuSI+fXXX83UqVPNrbfeap5++mn75y5ZssRs2rQp1/cr/hniwwlduXLF9OvXz1SoUMFs27bNGHMtQNavX2/Kli1r6taty8XinMB3331nGjVqZD9Lac5pmNu1a2cef/zxP/xAa9q0qSlbtqwZPHiw/QcmCr+c4z0++eQT4+rqamrUqGE/MZwxuQOkU6dOpmzZshxYarHvvvvOeHp6munTp9uXrVmzxpQqVcp+iYqEhAQzbdo0c+utt5oBAwaYyMhIY7PZOIHYDSI+CrmcH1znzp2zn6E0Z3mfPn1yBYgx185yuXr1aq7VUshduXLF3HvvvcZms5maNWua1atXG2OuBcjkyZNNcHCwGTNmjP0gxAsXLpghQ4aYGTNm2M9mi8Iv5/s3JSXFfPXVV2b16tVm9OjRJjg42MyaNcu+XU6AnD9/3vTs2dMcPXrUIfMWR8nJyaZVq1amdu3a9ovz5RwUHBISYsaOHWvf9pdffjHz5883bdu2NW3btuUg4JtAfDiBFStWmDZt2pjq1aub8ePH5zqaOidAcl6CgXPIysoyL774omnYsKEZMWKEqV69+h8CpE2bNubee+81mzZtMk888YRp1apVrgBF4ZYTHmvWrDG9evWyf4/++OOP5pFHHjHBwcG5TqP+3nvvmR9++IG3TFvkt5cgWLBggQkODjZDhw61X4Pl6NGjxtXV1f4SWY7fBiVuHPFRCP32h8/u3btNxYoVzaRJk8z06dNN9erVTZ8+fczGjRvt29xzzz3GZrPx3nInkfOSSUJCgqlatap5/PHHzejRo01AQID5+OOPjTHXAiQ6OtoEBwcbX19fc+utt/IWPie0atUqU6pUKfPcc8/lejv0qVOnzKOPPmratGljRo8ebZ555hljs9nsf3mjYC1btsy0bt3aLF++3L5s4cKFJigoyIwaNcp89tlnJiAgwERERNjX5/xcJg7zB/FRiCxfvjzXa73Hjh0zL774opk6dap92e7du02LFi1Mr169zBdffGFfPnDgQBMXF2fpvMibtLS0PyybPXu2GT16tNm1a5cZNGiQ8ff3N2vWrDHGXNv1m5ycbA4ePMjVS51QQkKCCQwMNC+99FKu5Tm79OPj480zzzxj2rRpY1q0aMFVTy30zTffmDvuuMOEhoaa9957z778zTffNM2aNTNeXl7mnnvusS/Pec6Qf4iPQiI+Pt60a9fOnDp1yhhjzK+//mqqVatmPD09zahRo3Jtu2vXLhMUFGTCw8PNunXrHDEu8ujQoUOmUaNGZvbs2SY2Nta+fP369SYgIMB8//33JiEhwQwePNgEBASYTz/91IHTIj+cOHHC1KpVy+zatcsYc+0v5t//1XzlyhWTlpZmvxI1Cl5OSBw7dsx069bN3HXXXeb999+3r1+yZIlp1KiRGTp0KCfvK0CcZKyQ8PPz0+effy5/f399++23kq5ds6VixYrav3+/YmNj7du2bt1aCxYs0L59+/TOO+8oLS3NQVPjn0hPT9e0adN06NAhzZ49W6+88opCQ0N17Ngxde3aVYMGDdKzzz6rihUrasKECQoNDVW/fv0UExPj6NFxE7Kzs/XTTz/p9OnT9mXmv6dV2rNnj7Zs2aKSJUvK09NTZcuWddCUxU/OeVPc3NzUrl077d+/X7NmzdLq1aslSQMGDNC4ceO0f/9+vfLKK7l+9iL/EB+FiKenp1JTUzVgwABFRESoXr16eu+99/Tjjz/q5ZdftkeJJLVs2VIffvihpk6dytVpCzkPDw8NHTpU/fr1U3p6uvr06aOaNWvq3nvvVd++fZWQkKDk5GSdP39ejRs31ogRI/Tggw+qRo0ajh4d/5C5zrkaa9asqbCwML3xxhvauXOnbDab/UJxb731lubPn28/GzGs4+LiohUrVqhhw4Y6e/asOnfurGPHjmnmzJn64IMPJEkPPfSQRo0apZiYGL399tu6cuWKg6cuejjDaSG0Z88ePfbYYwoMDNRLL72kQ4cO6b777lPnzp01fvx4NW7c2NEjIo+MMfryyy81efJknT9/Xjt27NCxY8f02Wefadq0abpw4YI2b96sDh06SLp2iuecy3OjcDP/PQvpxo0btXHjRn3//ffq2bOnunfvruPHj+uJJ55Q6dKlNXDgQFWsWFFr167VO++8o61bt3JGTAdISkpSly5dNGjQID3xxBOSpG+//VYjR45Udna2JkyYoF69ekmSlixZorZt26pmzZqOHLlIIj4Kqf379+uhhx5SUFCQPUAGDRqk5s2b6/nnn1fDhg0dPSLyKDs7Wzt27ND48eN1+fJlxcTEqFKlSjp06JDS09MVFBTE6bSd1MqVKzVw4EANHz5cqampOnz4sLKysvTVV18pJiZG7733nj744AP5+/vL29tbCxYsUNOmTR09drF08eJFtWrVSmPGjNEjjzxiP7X6d999p/bt26tJkyZ6+OGHNXDgQEePWrQ57GgT/K19+/aZZs2amYceesicP3/ebNq0yTRu3Nj89NNPjh4NfyPnwMJTp06ZH3/8Mdc7kXbu3Gluu+0206BBA5OQkGCMMZyx1An99jkODAy0n7X0zJkzxsfHJ9fJqYwxJjEx0SQlJXGFaQfKysoyZ8+eNUFBQebJJ580xlw7ADXn+69Pnz6mfPnypn///pzHo4ARH4Xcvn37TMuWLU2/fv1McnLydd+uicIl55fSRx99ZOrVq2dq1aplfHx8zIgRI8yJEyeMMdcCpG3btiYwMNAkJiY6clzkwdKlS+3nYslx+PBhU7duXZOcnGxOnDhh/P39zbBhw+zrv/jiC3P27FmrR4X583NyLFq0yNhsNrN48eJcyx999FHzyiuvcIZoC5R09J4X/LXmzZtr7ty5mjBhgtLS0uTj4+PokfA3bDabtmzZogceeECzZs1SgwYNdP78eQ0fPlyJiYmaPXu2goODNWPGDD366KPq3bu3tm/fLpvNxksuhdipU6f0+uuvyxgjd3d3de3aVZJ09epVVaxYUXFxcerXr5+6deumefPmSZIOHDig999/X2XLllWFChUcOX6xY/77Eua2bdu0bds2/fLLL+rZs6eCg4M1ePBgnThxQkOGDNHevXtVtWpVnT59Wu+//76+/fZbVa1a1dHjF3kc8+Ek0tPT5eHh4egx8A89/fTTio2N1aeffmpfFhsbq86dO2vQoEGaPXu2srOztWfPHlWuXFnVq1d34LT4KznHBEjSxo0bFR0drZSUFE2cOFGhoaGSrr37bN++fRo2bJgWLFhg/9yJEydq69atWr16tSpXruyQ+YujnPBYsWKFhgwZorvuuksnTpxQyZIl1a5dO02aNEmlS5fW0qVLtWDBAl26dEkeHh567bXX1Lx5c0ePXywQH0A+M8Zo6NCh+umnn7R+/XplZ2fr6tWrcnNz05IlSzR+/Hjt3r1bAQEBjh4VfyMnPI4ePaqKFSuqbNmy2rJli2bNmqWLFy9q3Lhx6tGjh3744Qf17NlTXl5eev7553X58mVt2rRJCxcu1JdffqnAwEBHfynFzs6dO9WvXz9NnjxZQ4cO1Y8//qhGjRqpatWq6tatm6ZNmyZvb2+dP39eXl5eSk9PV5kyZRw9drHBeT6Am5TT77/++qvS0tJks9nUs2dPbdmyRRs2bJCLi4tKlrz2CqeXl5cqVKggLy8vR46MfyAnPL755hvVr19fH374oSTp9ttv17hx4+Tl5aVZs2Zp3bp1qlGjht5//30ZY/TYY49p4sSJOnDggLZt20Z4OMjRo0cVGhqqoUOH6uTJk+rUqZPuuece9enTR++++66mTp2qlJQUlStXTq6uroSHxdjzAeSDVatW6aWXXtKZM2d03333KSQkROvWrdP69ev1yiuv6M4775QkRUZGKiYmRjExMSpXrpyDp8af+W14tG3bVmPGjNG0adNybbNhwwa9+uqrunDhgp588kn7SzBHjx6Vt7e3PD095e3t7YjxIenChQuKj49XnTp11KNHD/n5+dlPGFa/fn2lp6drwIABevHFFznWygE44BS4Sfv27dOQIUM0fvx4nTt3Tp9++qmOHDmi1q1bKywsTD169FBQUJBcXV118OBBffHFF4RHIfbb8Ljttts0evToXOGxdu1ade/eXV26dJGrq6tmzpypGTNmKDs7W2FhYapbt64Dpy+eco7xuHr1qjIzM+Xp6akyZcqoYcOGOnz4sE6fPq1JkyZJunaSsaZNmyowMFDDhg0jPByEl12Am3D8+HGtXbtWTzzxhCZNmqQ5c+Zo8uTJOnv2rHbs2KGOHTsqJiZGHTt2VM+ePfX1119zQFsh5+Liovj4eDVv3lxjxoxRVFSU/aW1F154Qffff7++++47Sddeghk/frzKli2rZ555Rl988YUjRy+WcsJj7dq1GjhwoFq0aKGJEydq5cqVkiRXV1fZbDZ99dVXOnv2rBYuXKi0tDSNHTtW/v7+Dp6++GLPB3CDUlNT1b9/f506dUoPPfSQfXnPnj0lSbNnz9bixYs1adIkvfDCC44aEzcgKSlJvr6+2r9/v/2XW1RUlF566SV98MEHatSokX0Pye23367MzEy99dZbql27tqNHL1ZynpuPP/5Y9913n8aPH68ePXpo4cKFWr16tWrWrKkGDRqoY8eOWrhwoebOnasrV65o7dq17H10MI75AG7C/v371b9/f1WsWFELFixQo0aN7OvWrl2rp59+Wo0aNdLrr78uT09PdvE6CWOMdu/erX79+qlx48Zq166dZs6cqXfeeUfdunXLtW1iYqJ8fX11+fJleXp6Omji4mPt2rXy8/NTYGCgjDE6e/as7rnnHvXu3VtjxozR5cuXVb16dT3wwAOaOXOmbDab0tPTtWvXLp09e1YtWrTgoo2FAPEB3KQDBw5o8ODBat26tUaPHp0rQD7//HPVr1+f83g4IWOMdu3apaFDh+rw4cNat26dunbtqqysLJUoUUKSNGHCBO3cuVMxMTGEhwWSkpIUEhKijh076oknntCtt96qS5cu6fbbb9fSpUvl5uam9u3bq3v37nr99dclSevWrVPDhg15a3shwzEfwE0KDAzUW2+9pT179mjOnDk6dOiQfV3Xrl0JDyfx+7/DbDabWrdurYULF6p27dp68cUXc4XH5MmTNXfuXM2cOZPwsEjlypX14Ycf6uDBg5o1a5YOHjyoEiVK6PLly9q8ebO6du2qsLAw+xlmf/jhBy1evNh+jA4KD+IDyAfNmzfXm2++qQMHDmjq1Kn6/vvvHT0S8uC3p+KOiorSY489ppiYGP36669q06aNlixZori4OPsp1adPn64ZM2Zo27ZtCg4OdvD0xUtQUJAWLFigffv2ac6cOTp//rxGjBihxx57TPXq1dMbb7xhD8Q33nhDBw8eVOPGjR08NX6Pl12AfLR792498cQTevfdd1WlShVHj4M8+OijjzRo0CC1a9dOFy5c0LfffquBAwfqscceU5MmTfT111/rvvvu008//SQXFxdt27ZNLVq0cPTYxdb+/fv10EMPqWXLlurfv7/WrVun2bNnKyoqSpJ08uRJLVmyRFu3blWzZs0cOyz+gPgA8hnX4XE+J0+eVNeuXTVx4kQNGzZMkvTuu+/qxRdf1G233aZp06bJx8dH27dv19SpU/Xvf/+bX2iFwP79+zVs2DC1bNlS4eHhiouL0/z58+Xl5aVatWrpX//6F3s9CiniA0Cxd+TIEXXt2lVLlixRu3bt7MuXLVumYcOG6fPPP1fbtm2VlZWlzMxM4rIQ2bdvnx555BE1a9ZMzz//vHx9fe3vcOF5Krw45gNAsZPzN1d6erqka6fivnDhgjIzMyVJly9fliTdf//9ql27ttatWydJKlGiBL/QCpmgoCC9/vrr+uabbzR27Fj7Ad/u7u4Ongx/hfgAUOzYbDbt2rVLrVu3liS1aNFCnTt31gMPPKBffvnF/u6VK1euqHTp0qpataojx8XfaN68uaKjo5WYmKjy5ctLEufUKeR42QVAsZLzzpbz58+refPm6tevn2bMmKEffvhBgwcP1pEjR/T666+rRIkS2r59u+bPn69du3apTp06jh4df4OXWpwH8QGgWMiJjrS0NJUqVUqSFB0drRUrVmj69Olq06aNTpw4oSlTpigmJkZeXl4qXbq03nrrLa7HA+Qz4gNAsbFp0yb169dPr732mtq2bSsvLy+FhYWpadOmmj9/vn27o0ePysvLS25ubqpQoYIDJwaKJi4sB6DY2LNnj86dO6dFixZp8+bN6tGjhxYtWqQmTZrozjvvVHh4uCSpTp06HDMAFCD2fAAoknJ+tP3+bZehoaFKTk7WiBEjNHnyZPXo0UOJiYk6c+aMFi1axJVpAQvwbhcARUZ2drb93zabTTabTZs2bdJzzz2nmJgYSddOje7n56eKFSvqyy+/1IkTJxQXF6ft27crJibmD9d4AZD/iA8ARYaLi4vi4+P14YcfSpJWrFihzp07a+fOnRozZoxefPFF+fn5qXr16tqxY4f8/Pz0wQcf6Omnn9Zdd92lO+64g5dbAAvwsguAIiMzM1ODBg3SqVOn1Lp1a7388sv68MMPFRwcrE8//VRjx45Vv379VL58eS1ZskSLFi1SWFiYsrOzlZWVJVdXV0d/CUCxQHwAKFKSk5PVrVs3ff311xo+fHiud7EcP35cEydOlLu7u5YvX66aNWsqJiZGtWrVcuDEQPHDyy4AipTSpUurdOnSatq0qU6cOKGlS5fa19WuXVsLFy5Uv379dPvttyshIcF+NlMA1mHPB4AiJyMjQ+fPn9fDDz+stLQ0DR06VAMGDLCvz8rKUokSJZSQkKAqVao4cFKgeCI+ABRZJ06c0OjRo5Wenq4hQ4bogQce0NNPP62kpCS9+eabjh4PKLaIDwBF2smTJzV+/HgdPXpUnp6eiouL0+eff67g4GBHjwYUW8QHgCLvp59+0vr163X69Gnde++9ql+/vqNHAoo14gMAAFiKd7sAAABLER8AAMBSxAcAALAU8QEAACxFfAAAAEsRHwAAwFLEBwAAsBTxAQAALEV8AAAASxEfAJzWkCFD1Lt3b0ePASCPiA8AAGAp4gPAdWVkZGj06NGqVKmSPDw81K5dO+3evVuStHnzZtlsNm3cuFEtW7ZUqVKldNtttykuLi7XfaxevVpBQUHy8PBQrVq1NGXKFF29etW+Pjk5WY888ogqV64sDw8PNW7cWJ988okk6bnnnlOzZs1y3d+cOXNUo0YN+/rFixdr9erVstlsstls2rx5c4H99wCQf0o6egAAhdPEiRP10UcfafHixapevbpmzJih0NBQHTt2zL7N008/rZkzZ6pixYp69NFH9dBDD2n79u2SpG3btmnQoEF65ZVX1L59ex0/flzDhw+XJE2ePFnZ2dkKCwvThQsXtGTJEtWuXVuHDh1SiRIl/tF8EyZM0OHDh5Wamqq3335bklS+fPl8/q8AoCAQHwD+4NKlS5o3b54WLVqksLAwSdIbb7yhmJgYLVy4UK1atZIkTZ8+Xbfffrsk6amnnlKPHj2Unp4uDw8PTZkyRU899ZQGDx4sSapVq5amTp2qiRMnavLkydqwYYO+/vprHT58WPXq1bNv8095eXnJ09NTGRkZ8vX1zc8vH0ABIz4A/MHx48eVmZmptm3b2pe5urqqdevWOnz4sD0+AgMD7eurVKkiSTpz5owCAgL0zTffaPv27Zo+fbp9m6ysLKWnpystLU2xsbHy8/OzhweA4oP4AHDDXF1d7f+22WySpOzsbEnSxYsXNWXKFPXt2/cPn+fh4SFPT8+/vG8XFxcZY3Ity8zMvNmRARQCxAeAP6hdu7bc3Ny0fft2Va9eXdK1X/y7d+/WmDFj/tF9BAUFKS4uTnXq1Lnu+sDAQJ0+fVpHjhy57t6PihUrKjExUcYYe9jExsbm2sbNzU1ZWVn//AsDUCgQHwD+oHTp0nrsscf0xBNPqHz58goICNCMGTOUlpamoUOH6ptvvvnb+3j22Wd11113KSAgQHfffbdcXFz0zTff6ODBg5o2bZpuv/12dejQQeHh4Zo1a5bq1Kmj77//XjabTd26dVPHjh31yy+/aMaMGbr77ru1bt06ffbZZ/L29rY/Ro0aNbR+/XrFxcWpQoUK8vHxybU3BkDhxFttAVzXCy+8oPDwcA0cOFBBQUE6duyY1q9fr3Llyv2jzw8NDdUnn3yizz//XK1atVKbNm00e/Zs+54USfroo4/UqlUr3XfffWrYsKEmTpxo35Nx6623au7cuYqOjlbTpk319ddfa8KECbkeY9iwYapfv75atmypihUr2t9pA6Bws5nfv6gKAABQgNjzAQAALEV8AAAASxEfAADAUsQHAACwFPEBAAAsRXwAAABLER8AAMBSxAcAALAU8QEAACxFfAAAAEsRHwAAwFLEBwAAsNT/A/T/denVjbUDAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "honecut, hcutflow, labels = cutflow.yieldhist()\n", + "\n", + "honecut.plot1d(yerr=0)\n", + "plt.xticks(plt.gca().get_xticks(), labels, rotation=45)\n", + "plt.show()\n", + "\n", + "hcutflow.plot1d(yerr=0)\n", + "plt.xticks(plt.gca().get_xticks(), labels, rotation=45)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "b7faec21-57c7-477d-9f7b-1a22c06c0754", + "metadata": {}, + "source": [ + "Saving to `.npz` files is again there." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "ecc3be33-da27-4845-bba2-f163729670ce", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "labels: ['initial' 'noMuon' 'twoElectron' 'leadPt20']\n", + "nevonecut: [40 28 5 17]\n", + "nevcutflow: [40 28 5 3]\n", + "masksonecut: [[ True True True True False False False True True True False True\n", + " True True False True True True True True True True True False\n", + " False True False True False True False False True True False True\n", + " True True True True]\n", + " [False False True True False False False False False False False False\n", + " False False False False False False True False True True False False\n", + " False False False False False False False False False False False False\n", + " False False False False]\n", + " [False True True False True True True False False True False False\n", + " False False False True True False False False True True False True\n", + " True False True True False True False True False True False False\n", + " False False False False]]\n", + "maskscutflow: [[ True True True True False False False True True True False True\n", + " True True False True True True True True True True True False\n", + " False True False True False True False False True True False True\n", + " True True True True]\n", + " [False False True True False False False False False False False False\n", + " False False False False False False True False True True False False\n", + " False False False False False False False False False False False False\n", + " False False False False]\n", + " [False False True False False False False False False False False False\n", + " False False False False False False False False True True False False\n", + " False False False False False False False False False False False False\n", + " False False False False]]\n" + ] + } + ], + "source": [ + "cutflow.to_npz(\"cutflow_results.npz\")\n", + "\n", + "with np.load(\"cutflow_results.npz\") as f:\n", + " for i in f.files:\n", + " print(f\"{i}: {f[i]}\")" + ] + }, + { + "cell_type": "markdown", + "id": "423a72e7-79f6-4e6c-ab59-b2d709e3f9a0", + "metadata": {}, + "source": [ + "And finally, `plot_vars` is also there with the same axes customizability while now it returns two lists of histograms, one for \"onecut\" and one for \"cutflow\". Those can of course be plotted in a similar fashion." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "22454105-13f9-4f22-9450-9915cab46759", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "([Hist(\n", + " Regular(20, 5.81891, 60.0685, name='ept'),\n", + " Integer(0, 4, name='onecut'),\n", + " storage=Double()) # Sum: 73.0,\n", + " Hist(\n", + " Regular(20, -2.93115, 3.11865, name='ephi'),\n", + " Integer(0, 4, name='onecut'),\n", + " storage=Double()) # Sum: 73.0],\n", + " [Hist(\n", + " Regular(20, 5.81891, 60.0685, name='ept'),\n", + " Integer(0, 4, name='cutflow'),\n", + " storage=Double()) # Sum: 63.0,\n", + " Hist(\n", + " Regular(20, -2.93115, 3.11865, name='ephi'),\n", + " Integer(0, 4, name='cutflow'),\n", + " storage=Double()) # Sum: 63.0],\n", + " ['initial', 'noMuon', 'twoElectron', 'leadPt20'])" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "h1, h2, labels = cutflow.plot_vars(\n", + " {\"ept\": events.Electron.pt, \"ephi\": events.Electron.phi}\n", + ")\n", + "h1, h2, labels" + ] + }, + { + "cell_type": "markdown", + "id": "a96edc79-1b3b-4ff9-8459-6ef28a99d629", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "Now, in `coffea` 2023, everything happens in a delayed fashion. Therefore, `PackedSelection` can also operate in delayed or lazy mode and fully support `dask_awkward` arrays. Use is still the same, but everything now is\n", + "a delayed `dask` type object which can be computed whenever the user wants to. This can be done by either calling `.compute()` on the object or `dask.compute(*things)`.\n", + "\n", + "PackedSelection can be initialized to operate in delayed mode by adding a delayed `dask_awkward` array for the first time instead of a materialized `numpy` or `awkward` one.\n", + "I would like to note that we only support delayed `dask_awkward` arrays and not `dask.array` arrays. Please convert your `dask` arrays to `dask_awkward` via `dask_awkward.from_dask_array(array)`. I would also like to note that you cannot mix materialized and delayed arrays in the same `PackedSelection`. Let's now read the same events using dask and perform the exact same things." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "1adf0374-eee9-471b-9582-9cbbf06d2dda", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/iason/fun/coffea_dev/coffea/binder/coffea/nanoevents/schemas/nanoaod.py:215: RuntimeWarning: Missing cross-reference index for FatJet_genJetAK8Idx => GenJetAK8\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "dask.awkward" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import dask\n", + "import dask_awkward as dak\n", + "\n", + "dakevents = NanoEventsFactory.from_root(\n", + " {\"../tests/samples/nano_dy.root\": \"Events\"},\n", + " metadata={\"dataset\": \"nano_dy\"},\n", + " schemaclass=NanoAODSchema,\n", + " permit_dask=True,\n", + ").events()\n", + "\n", + "dakevents" + ] + }, + { + "cell_type": "markdown", + "id": "8cb3c5fc-8897-4860-8fb0-924655e81ef7", + "metadata": {}, + "source": [ + "Now `dakevents` is a delayed `dask_awkward` version of our events and if we compute it we get our normal events." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "a54d1fcc-13c6-4919-bbac-3d6c299f65ab", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
[{FsrPhoton: [], Electron: [], SoftActivityJetHT5: 63.5, RawMET: {...}, ...},\n",
+       " {FsrPhoton: [], Electron: [{...}], SoftActivityJetHT5: 64, RawMET: {...}, ...},\n",
+       " {FsrPhoton: [], Electron: [Electron, Electron], SoftActivityJetHT5: 130, ...},\n",
+       " {FsrPhoton: [], Electron: [Electron, Electron], SoftActivityJetHT5: 25.8, ...},\n",
+       " {FsrPhoton: [], Electron: [], SoftActivityJetHT5: 172, RawMET: {...}, ...},\n",
+       " {FsrPhoton: [], Electron: [{...}], SoftActivityJetHT5: 54.4, RawMET: ..., ...},\n",
+       " {FsrPhoton: [], Electron: [{...}], SoftActivityJetHT5: 96.2, RawMET: ..., ...},\n",
+       " {FsrPhoton: [], Electron: [], SoftActivityJetHT5: 19, RawMET: {...}, ...},\n",
+       " {FsrPhoton: [], Electron: [], SoftActivityJetHT5: 9.36, RawMET: {...}, ...},\n",
+       " {FsrPhoton: [], Electron: [{...}], SoftActivityJetHT5: 115, RawMET: ..., ...},\n",
+       " ...,\n",
+       " {FsrPhoton: [], Electron: [{...}], SoftActivityJetHT5: 49.6, RawMET: ..., ...},\n",
+       " {FsrPhoton: [], Electron: [], SoftActivityJetHT5: 14.7, RawMET: {...}, ...},\n",
+       " {FsrPhoton: [], Electron: [{...}], SoftActivityJetHT5: 22.1, RawMET: ..., ...},\n",
+       " {FsrPhoton: [], Electron: [], SoftActivityJetHT5: 33.9, RawMET: {...}, ...},\n",
+       " {FsrPhoton: [], Electron: [{...}], SoftActivityJetHT5: 16.2, RawMET: ..., ...},\n",
+       " {FsrPhoton: [], Electron: [], SoftActivityJetHT5: 28.4, RawMET: {...}, ...},\n",
+       " {FsrPhoton: [], Electron: [{...}], SoftActivityJetHT5: 16.1, RawMET: ..., ...},\n",
+       " {FsrPhoton: [], Electron: [], SoftActivityJetHT5: 28.5, RawMET: {...}, ...},\n",
+       " {FsrPhoton: [], Electron: [], SoftActivityJetHT5: 7, RawMET: {...}, ...}]\n",
+       "--------------------------------------------------------------------------------\n",
+       "type: 40 * event
" + ], + "text/plain": [ + ", ...] type='40 * event'>" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dakevents.compute()" + ] + }, + { + "cell_type": "markdown", + "id": "bd16d282-ace9-4d86-9079-dd66a635e508", + "metadata": {}, + "source": [ + "Now we have to use `dask_awkward` instead of `awkward` and `dakevents` instead of `events` to do the same things. Let's add the same (now delayed) arrays to PackedSelection." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "dc25b728-7504-44fb-b920-051dab6c99d1", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "PackedSelection(selections=('twoElectron', 'eleOppSign', 'noElectron', 'twoMuon', 'muOppSign', 'noMuon', 'leadPt20'), delayed_mode=True, items=7, maxitems=32)\n" + ] + } + ], + "source": [ + "selection = PackedSelection()\n", + "\n", + "selection.add_multiple(\n", + " {\n", + " \"twoElectron\": dak.num(dakevents.Electron) == 2,\n", + " \"eleOppSign\": dak.sum(dakevents.Electron.charge, axis=1) == 0,\n", + " \"noElectron\": dak.num(dakevents.Electron) == 0,\n", + " \"twoMuon\": dak.num(dakevents.Muon) == 2,\n", + " \"muOppSign\": dak.sum(dakevents.Muon.charge, axis=1) == 0,\n", + " \"noMuon\": dak.num(dakevents.Muon) == 0,\n", + " \"leadPt20\": dak.any(dakevents.Electron.pt >= 20.0, axis=1)\n", + " | dak.any(dakevents.Muon.pt >= 20.0, axis=1),\n", + " }\n", + ")\n", + "\n", + "print(selection)" + ] + }, + { + "cell_type": "markdown", + "id": "49520c74-3bac-4355-b4b8-482ac38b4ae2", + "metadata": {}, + "source": [ + "Now, the same functions will return `dask_awkward` objects that have to be computed." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "74c43d65-824b-49e6-a6aa-aa7bef97e7d9", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "dask.awkward" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "selection.all(\"twoElectron\", \"noMuon\", \"leadPt20\")" + ] + }, + { + "cell_type": "markdown", + "id": "87ce9147-a37d-46ba-802c-f1531cd1cf3d", + "metadata": {}, + "source": [ + "When computing those arrays we should get the same arrays that we got when operating in eager mode." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "be4b8e7a-5ea1-45f4-b1d9-827e90331366", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[False, False, True, False, False, ..., False, False, False, False, False]\n", + "[False, False, False, True, False, ..., False, False, False, False, False]\n" + ] + } + ], + "source": [ + "print(selection.all(\"twoElectron\", \"noMuon\", \"leadPt20\").compute())\n", + "print(selection.require(twoElectron=True, noMuon=True, eleOppSign=False).compute())" + ] + }, + { + "cell_type": "markdown", + "id": "b32ece8e-90af-4808-8248-8dfe63c16a6c", + "metadata": {}, + "source": [ + "Now, N-1 and cutflow will just return only delayed objects that must be computed." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "4e79a6b3-55cc-4248-a1db-a076f4667278", + "metadata": { + "slideshow": { + "slide_type": "-" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "NminusOne(selections=('twoElectron', 'noMuon', 'leadPt20'))" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nminusone = selection.nminusone(\"twoElectron\", \"noMuon\", \"leadPt20\")\n", + "nminusone" + ] + }, + { + "cell_type": "markdown", + "id": "e661f0d1-f792-423c-b698-f031209a721f", + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "source": [ + "It is again an `NminusOne` object which has the same methods." + ] + }, + { + "cell_type": "markdown", + "id": "8442cbf2-6c41-4174-a011-a0c451c2feb6", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "Let's look at the results of the N-1 selection in the same way" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "dee4c70f-d6fa-4b0a-9d04-f5555ec1856f", + "metadata": { + "slideshow": { + "slide_type": "-" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(['initial', 'N - twoElectron', 'N - noMuon', 'N - leadPt20', 'N'],\n", + " [dask.awkward,\n", + " dask.awkward,\n", + " dask.awkward,\n", + " dask.awkward,\n", + " dask.awkward],\n", + " [dask.awkward,\n", + " dask.awkward,\n", + " dask.awkward,\n", + " dask.awkward])" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "labels, nev, masks = nminusone.result()\n", + "labels, nev, masks" + ] + }, + { + "cell_type": "markdown", + "id": "fbd64ce9-fa7d-4ff6-847a-856dae236565", + "metadata": {}, + "source": [ + "Now however, you can see that everything is a dask awkward object (apart from the labels of course). If we compute them we should get the same things as before and indeed we do:" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "f8fc01f5-3f07-441d-b7de-44aeeaa042ca", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "((40, 10, 3, 5, 3),\n", + " (,\n", + " ,\n", + " ,\n", + " ))" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dask.compute(*nev), dask.compute(*masks)" + ] + }, + { + "cell_type": "markdown", + "id": "1df3b553-70b9-4785-b5f3-f44cd25889a6", + "metadata": {}, + "source": [ + "We can again print the statistics, however for this to happen, the object must of course compute the delayed `nev` list." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "118ee230-eb4f-44da-9dba-05ce15e7951e", + "metadata": { + "slideshow": { + "slide_type": "-" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "N-1 selection stats:\n", + "Ignoring twoElectron : pass = 10 all = 40 -- eff = 25.0 %\n", + "Ignoring noMuon : pass = 3 all = 40 -- eff = 7.5 %\n", + "Ignoring leadPt20 : pass = 5 all = 40 -- eff = 12.5 %\n", + "All cuts : pass = 3 all = 40 -- eff = 7.5 %\n" + ] + } + ], + "source": [ + "nminusone.print()" + ] + }, + { + "cell_type": "markdown", + "id": "e1146ee8-5fe8-4e05-8ebe-658014f613a3", + "metadata": {}, + "source": [ + "And now if we call `result()` again, the `nev` list is materialized." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "700e7669-0459-4e3d-b020-805562106cab", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[40, 10, 3, 5, 3]" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nminusone.result().nev" + ] + }, + { + "cell_type": "markdown", + "id": "b77aa859-7ae1-4c54-9fc4-2ab516e3a483", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "Again the histogram of your total event yields works. This time it is returns a `hist.dask.Hist` object." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "a2c4fb20-b241-4eeb-8312-ee5a645f30b6", + "metadata": { + "slideshow": { + "slide_type": "-" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "5\n", + "\n", + "\n", + "N-1\n", + "\n", + "\n", + "\n", + "
\n", + "
\n", + "Integer(0, 5, name='N-1')
\n", + "
\n", + "Double() Σ=0.0\n", + "\n", + "
\n", + "
\n", + "" + ], + "text/plain": [ + "Hist(Integer(0, 5, name='N-1'), storage=Double()) # (has staged fills)" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "h, labels = nminusone.yieldhist()\n", + "h" + ] + }, + { + "cell_type": "markdown", + "id": "bc992bfd-cce3-4004-9e16-275d69044745", + "metadata": {}, + "source": [ + "It appears empty because it hasn't been computed yet. Let's do that." + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "38a6d003-fc3c-46e5-ad3b-5d343e997dc5", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "5\n", + "\n", + "\n", + "N-1\n", + "\n", + "\n", + "\n", + "
\n", + "
\n", + "Integer(0, 5, name='N-1')
\n", + "
\n", + "Double() Σ=61.0\n", + "\n", + "
\n", + "
\n", + "" + ], + "text/plain": [ + "Hist(Integer(0, 5, name='N-1'), storage=Double()) # Sum: 61.0" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "h.compute()" + ] + }, + { + "cell_type": "markdown", + "id": "c36f391a-d2a6-4364-afcc-8da81d72ec90", + "metadata": {}, + "source": [ + "Notice that this doesn't happen in place as `h` is still not computed." + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "226cd7fa-ac79-460d-aee5-856eb3362099", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "5\n", + "\n", + "\n", + "N-1\n", + "\n", + "\n", + "\n", + "
\n", + "
\n", + "Integer(0, 5, name='N-1')
\n", + "
\n", + "Double() Σ=0.0\n", + "\n", + "
\n", + "
\n", + "" + ], + "text/plain": [ + "Hist(Integer(0, 5, name='N-1'), storage=Double()) # (has staged fills)" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "h" + ] + }, + { + "cell_type": "markdown", + "id": "6e496df3-8846-4678-a92a-deaf0f2cafbb", + "metadata": {}, + "source": [ + "We can again plot this histogram but we have to call plot on the computed one, otherwise it will just be empty." + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "c96c48b2-53b3-460b-8019-e9f91473aef5", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "h.compute().plot1d()\n", + "plt.xticks(plt.gca().get_xticks(), labels, rotation=45)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "8ca4e1f0-f5a3-46b3-9bcb-848aa3e04a99", + "metadata": {}, + "source": [ + "And we got exactly the same thing. Saving to `.npz` files is still possible but the delayed arrays will be naturally materalized while saving." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "d0d57fbb-633b-4140-95b6-7ac551a4c271", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "labels: ['initial' 'N - twoElectron' 'N - noMuon' 'N - leadPt20' 'N']\n", + "nev: [40 10 3 5 3]\n", + "masks: [[False True True False False False False False False True False False\n", + " False False False True True False False False True True False False\n", + " False False False True False True False False False True False False\n", + " False False False False]\n", + " [False False True False False False False False False False False False\n", + " False False False False False False False False True True False False\n", + " False False False False False False False False False False False False\n", + " False False False False]\n", + " [False False True True False False False False False False False False\n", + " False False False False False False True False True True False False\n", + " False False False False False False False False False False False False\n", + " False False False False]\n", + " [False False True False False False False False False False False False\n", + " False False False False False False False False True True False False\n", + " False False False False False False False False False False False False\n", + " False False False False]]\n" + ] + } + ], + "source": [ + "nminusone.to_npz(\"nminusone_results.npz\")\n", + "\n", + "with np.load(\"nminusone_results.npz\") as f:\n", + " for i in f.files:\n", + " print(f\"{i}: {f[i]}\")" + ] + }, + { + "cell_type": "markdown", + "id": "7dd71131-d0df-4930-b853-d126d7c0c2db", + "metadata": {}, + "source": [ + "Same logic applies to the `plot_vars` function. Remember to use `dakevents` now and not `events`." + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "621cc56b-7bf1-4be8-b24c-04c89125e6b0", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "hs, labels = nminusone.plot_vars(\n", + " {\"Ept\": dakevents.Electron.pt, \"Ephi\": dakevents.Electron.phi}\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "8e94ed9c-dfd7-47d3-997a-4c7561f51147", + "metadata": {}, + "source": [ + "Those histograms are also delayed and have to be computed before plotting them." + ] + }, + { + "cell_type": "markdown", + "id": "0877deac-0e27-480d-b471-bafba2441328", + "metadata": {}, + "source": [ + "Exactly the same things apply to the cutflow in delayed mode." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pyproject.toml b/pyproject.toml index 80c453052..a840f32b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ classifiers = [ ] dependencies = [ "awkward>=2.2.3", - "uproot>=5.0.7", + "uproot>=5.0.9", "dask[array]>=2023.4.0", "dask-awkward>=2023.6.3", "dask-histogram>=2023.6.0", diff --git a/src/coffea/analysis_tools.py b/src/coffea/analysis_tools.py index 1e85eeed9..66b92fe2b 100644 --- a/src/coffea/analysis_tools.py +++ b/src/coffea/analysis_tools.py @@ -3,9 +3,17 @@ These helper classes were previously part of ``coffea.processor`` but have been migrated and updated to be compatible with awkward-array 1.0 """ +import warnings +from collections import namedtuple + import awkward +import dask.array import dask_awkward +import hist +import hist.dask import numpy +from dask_awkward.lib.core import compatible_partitions +from dask_awkward.utils import IncompatiblePartitions import coffea.processor import coffea.util @@ -409,6 +417,603 @@ def variations(self): return keys +class NminusOneToNpz: + """Object to be returned by NmiusOne.to_npz()""" + + def __init__(self, file, labels, nev, masks, saver): + self._file = file + self._labels = labels + self._nev = nev + self._masks = masks + self._saver = saver + + def __repr__(self): + return f"NminusOneToNpz(file={self._file}), labels={self._labels})" + + @property + def file(self): + return self._file + + @property + def labels(self): + return self._labels + + @property + def nev(self): + return self._nev + + @property + def masks(self): + return self._masks + + def compute(self): + self._nev = list(dask.compute(*self._nev)) + self._masks = list(dask.compute(*self._masks)) + self._saver(self._file, labels=self._labels, nev=self._nev, masks=self._masks) + + +class CutflowToNpz: + """Object to be returned by Cutflow.to_npz()""" + + def __init__( + self, file, labels, nevonecut, nevcutflow, masksonecut, maskscutflow, saver + ): + self._file = file + self._labels = labels + self._nevonecut = nevonecut + self._nevcutflow = nevcutflow + self._masksonecut = masksonecut + self._maskscutflow = maskscutflow + self._saver = saver + + def __repr__(self): + return f"CutflowToNpz(file={self._file}), labels={self._labels})" + + @property + def file(self): + return self._file + + @property + def labels(self): + return self._labels + + @property + def nevonecut(self): + return self._nevonecut + + @property + def nevcutflow(self): + return self._nevcutflow + + @property + def masksonecut(self): + return self._masksonecut + + @property + def maskscutflow(self): + return self._maskscutflow + + def compute(self): + self._nevonecut = list(dask.compute(*self._nevonecut)) + self._nevcutflow = list(dask.compute(*self._nevcutflow)) + self._masksonecut = list(dask.compute(*self._masksonecut)) + self._maskscutflow = list(dask.compute(*self._maskscutflow)) + numpy.savez( + self._file, + labels=self._labels, + nevonecut=self._nevonecut, + nevcutflow=self._nevcutflow, + masksonecut=self._masksonecut, + maskscutflow=self._maskscutflow, + ) + + +class NminusOne: + """Object to be returned by PackedSelection.nminusone()""" + + def __init__(self, names, nev, masks, delayed_mode): + self._names = names + self._nev = nev + self._masks = masks + self._delayed_mode = delayed_mode + + def __repr__(self): + return f"NminusOne(selections={self._names})" + + def result(self): + """Returns the results of the N-1 selection as a namedtuple + + Returns + ------- + result : NminusOneResult + A namedtuple with the following attributes: + + nev : list of integers or dask_awkward.lib.core.Scalar objects + The number of events in each step of the N-1 selection as a list of integers or delayed integers + masks : list of boolean numpy.ndarray or dask_awkward.lib.core.Array objects + The boolean mask vectors of which events pass the N-1 selection each time as a list of materialized or delayed boolean arrays + + """ + NminusOneResult = namedtuple("NminusOneResult", ["labels", "nev", "masks"]) + labels = ["initial"] + [f"N - {i}" for i in self._names] + ["N"] + return NminusOneResult(labels, self._nev, self._masks) + + def to_npz(self, file, compressed=False, compute=True): + """Saves the results of the N-1 selection to a .npz file + + Parameters + ---------- + file : str or file + Either the filename (string) or an open file (file-like object) + where the data will be saved. If file is a string or a Path, the + ``.npz`` extension will be appended to the filename if it is not + already there. + compressed : bool, optional + If True, the data will be compressed in the ``.npz`` file. + Default is False. + compute : bool, optional + Whether to immediately start writing or to return an object + that the user can choose when to start writing by calling compute(). + Default is True. + + Returns + ------- + out : NminusOneToNpz or None + If ``compute=True``, returns None. Otherwise, returns an object + that can be used to start writing the data by calling compute(). + """ + labels, nev, masks = self.result() + + if compressed: + saver = numpy.savez_compressed + else: + saver = numpy.savez + + out = NminusOneToNpz(file, labels, nev, masks, saver) + if compute: + out.compute() + return None + else: + return out + + def print(self): + """Prints the statistics of the N-1 selection""" + + if self._delayed_mode: + self._nev = list(dask.compute(*self._nev)) + nev = self._nev + print("N-1 selection stats:") + for i, name in enumerate(self._names): + print( + f"Ignoring {name:<20}: pass = {nev[i+1]:<20}\ + all = {nev[0]:<20}\ + -- eff = {nev[i+1]*100/nev[0]:.1f} %" + ) + + if True: + print( + f"All cuts {'':<20}: pass = {nev[-1]:<20}\ + all = {nev[0]:<20}\ + -- eff = {nev[-1]*100/nev[0]:.1f} %" + ) + + def yieldhist(self): + """Returns the N-1 selection yields as a ``hist.Hist`` object + + Returns + ------- + h : hist.Hist or hist.dask.Hist + Histogram of the number of events surviving the N-1 selection + labels : list of strings + The bin labels of the histogram + """ + labels = ["initial"] + [f"N - {i}" for i in self._names] + ["N"] + if not self._delayed_mode: + h = hist.Hist(hist.axis.Integer(0, len(labels), name="N-1")) + h.fill(numpy.arange(len(labels)), weight=self._nev) + + else: + h = hist.dask.Hist(hist.axis.Integer(0, len(labels), name="N-1")) + for i, weight in enumerate(self._masks, 1): + h.fill(dask_awkward.full_like(weight, i, dtype=int), weight=weight) + h.fill(dask_awkward.zeros_like(weight)) + + return h, labels + + def plot_vars( + self, + vars, + axes=None, + bins=None, + start=None, + stop=None, + edges=None, + transform=None, + ): + """Plot the histograms of variables for each step of the N-1 selection + + Parameters + ---------- + vars : dict + A dictionary in the form ``{name: array}`` where ``name`` is the name of the variable, + and ``array`` is the corresponding array of values. + The arrays must be the same length as each mask of the N-1 selection. + axes : list of hist.axis objects, optional + The axes objects to histogram the variables on. This will override all the following arguments that define axes. + Must be the same length as ``vars``. + bins : iterable of integers or Nones, optional + The number of bins for each variable histogram. If not specified, it defaults to 20. + Must be the same length as ``vars``. + start : iterable of floats or integers or Nones, optional + The lower edge of the first bin for each variable histogram. If not specified, it defaults to the minimum value of the variable array. + Must be the same length as ``vars``. + stop : iterable of floats or integers or Nones, optional + The upper edge of the last bin for each variable histogram. If not specified, it defaults to the maximum value of the variable array. + Must be the same length as ``vars``. + edges : list of iterables of floats or integers, optional + The bin edges for each variable histogram. This overrides ``bins``, ``start``, and ``stop`` if specified. + Must be the same length as ``vars``. + transform : iterable of hist.axis.transform objects or Nones, optional + The transforms to apply to each variable histogram axis. If not specified, it defaults to None. + Must be the same length as ``vars``. + + Returns + ------- + hists : list of hist.Hist or hist.dask.Hist objects + A list of 2D histograms of the variables for each step of the N-1 selection. + The first axis is the variable, the second axis is the N-1 selection step. + labels : list of strings + The bin labels of y axis of the histogram. + """ + if self._delayed_mode: + for name, var in vars.items(): + if not compatible_partitions(var, self._masks[0]): + raise IncompatiblePartitions("plot_vars", var, self._masks[0]) + else: + for name, var in vars.items(): + if len(var) != len(self._masks[0]): + raise ValueError( + f"The variable '{name}' has length '{len(var)}', but the masks have length '{len(self._masks[0])}'" + ) + + hists = [] + labels = ["initial"] + [f"N - {i}" for i in self._names] + ["N"] + + bins = [None] * len(vars) if bins is None else bins + start = [None] * len(vars) if start is None else start + stop = [None] * len(vars) if stop is None else stop + edges = [None] * len(vars) if edges is None else edges + transform = [None] * len(vars) if transform is None else transform + + if axes is not None: + axes = axes + else: + axes = [] + for (name, var), b, s1, s2, e, t in zip( + vars.items(), bins, start, stop, edges, transform + ): + ax = coffea.util._gethistogramaxis( + name, var, b, s1, s2, e, t, self._delayed_mode + ) + axes.append(ax) + + checklengths = [ + len(x) == len(vars) for x in (axes, bins, start, stop, edges, transform) + ] + if not all(checklengths): + raise ValueError( + "vars, axes, bins, start, stop, edges, and transform must be the same length" + ) + + if not self._delayed_mode: + for (name, var), axis in zip(vars.items(), axes): + h = hist.Hist( + axis, + hist.axis.Integer(0, len(labels), name="N-1"), + ) + arr = awkward.flatten(var) + h.fill(arr, awkward.zeros_like(arr)) + for i, mask in enumerate(self.result().masks, 1): + arr = awkward.flatten(var[mask]) + h.fill(arr, awkward.full_like(arr, i, dtype=int)) + hists.append(h) + + else: + for (name, var), axis in zip(vars.items(), axes): + h = hist.dask.Hist( + axis, + hist.axis.Integer(0, len(labels), name="N-1"), + ) + arr = dask_awkward.flatten(var) + h.fill(arr, dask_awkward.zeros_like(arr)) + for i, mask in enumerate(self.result().masks, 1): + arr = dask_awkward.flatten(var[mask]) + h.fill(arr, dask_awkward.full_like(arr, i, dtype=int)) + hists.append(h) + + return hists, labels + + +class Cutflow: + """Object to be returned by PackedSelection.cutflow()""" + + def __init__( + self, names, nevonecut, nevcutflow, masksonecut, maskscutflow, delayed_mode + ): + self._names = names + self._nevonecut = nevonecut + self._nevcutflow = nevcutflow + self._masksonecut = masksonecut + self._maskscutflow = maskscutflow + self._delayed_mode = delayed_mode + + def __repr__(self): + return f"Cutflow(selections={self._names})" + + def result(self): + """Returns the results of the cutflow as a namedtuple + + Returns + ------- + result : CutflowResult + A namedtuple with the following attributes: + + nevonecut : list of integers or dask_awkward.lib.core.Scalar objects + The number of events that survive each cut alone as a list of integers or delayed integers + nevcutflow : list of integers or dask_awkward.lib.core.Scalar objects + The number of events that survive the cumulative cutflow as a list of integers or delayed integers + masksonecut : list of boolean numpy.ndarray or dask_awkward.lib.core.Array objects + The boolean mask vectors of which events pass each cut alone as a list of materialized or delayed boolean arrays + maskscutflow : list of boolean numpy.ndarray or dask_awkward.lib.core.Array objects + The boolean mask vectors of which events pass the cumulative cutflow a list of materialized or delayed boolean arrays + """ + CutflowResult = namedtuple( + "CutflowResult", + ["labels", "nevonecut", "nevcutflow", "masksonecut", "maskscutflow"], + ) + labels = ["initial"] + list(self._names) + return CutflowResult( + labels, + self._nevonecut, + self._nevcutflow, + self._masksonecut, + self._maskscutflow, + ) + + def to_npz(self, file, compressed=False, compute=True): + """Saves the results of the cutflow to a .npz file + + Parameters + ---------- + file : str or file + Either the filename (string) or an open file (file-like object) + where the data will be saved. If file is a string or a Path, the + ``.npz`` extension will be appended to the filename if it is not + already there. + compressed : bool, optional + If True, the data will be compressed in the ``.npz`` file. + Default is False. + compute : bool, optional + Whether to immediately start writing or to return an object + that the user can choose when to start writing by calling compute(). + Default is True. + + Returns + ------- + out : CutflowToNpz or None + If ``compute=True``, returns None. Otherwise, returns an object + that can be used to start writing the data by calling compute(). + """ + labels, nevonecut, nevcutflow, masksonecut, maskscutflow = self.result() + + if compressed: + saver = numpy.savez_compressed + else: + saver = numpy.savez + + out = CutflowToNpz( + file, labels, nevonecut, nevcutflow, masksonecut, maskscutflow, saver + ) + if compute: + out.compute() + return None + else: + return out + + def print(self): + """Prints the statistics of the Cutflow""" + + if self._delayed_mode: + self._nevonecut = list(dask.compute(*self._nevonecut)) + self._nevcutflow = list(dask.compute(*self._nevcutflow)) + nevonecut = self._nevonecut + nevcutflow = self._nevcutflow + print("Cutflow stats:") + for i, name in enumerate(self._names): + print( + f"Cut {name:<20}: pass = {nevonecut[i+1]:<20}\ + cumulative pass = {nevcutflow[i+1]:<20}\ + all = {nevonecut[0]:<20}\ + -- eff = {nevonecut[i+1]*100/nevonecut[0]:.1f} %\ + -- cumulative eff = {nevcutflow[i+1]*100/nevcutflow[0]:.1f} %" + ) + + def yieldhist(self): + """Returns the cutflow yields as ``hist.Hist`` objects + + Returns + ------- + honecut : hist.Hist or hist.dask.Hist + Histogram of the number of events surviving each cut alone + hcutflow : hist.Hist or hist.dask.Hist + Histogram of the number of events surviving the cumulative cutflow + labels : list of strings + The bin labels of the histograms + """ + labels = ["initial"] + list(self._names) + + if not self._delayed_mode: + honecut = hist.Hist(hist.axis.Integer(0, len(labels), name="onecut")) + hcutflow = honecut.copy() + hcutflow.axes.name = ("cutflow",) + honecut.fill(numpy.arange(len(labels)), weight=self._nevonecut) + hcutflow.fill(numpy.arange(len(labels)), weight=self._nevcutflow) + + else: + honecut = hist.dask.Hist(hist.axis.Integer(0, len(labels), name="onecut")) + hcutflow = honecut.copy() + hcutflow.axes.name = ("cutflow",) + + for i, weight in enumerate(self._masksonecut, 1): + honecut.fill( + dask_awkward.full_like(weight, i, dtype=int), weight=weight + ) + honecut.fill(dask_awkward.zeros_like(weight)) + for i, weight in enumerate(self._maskscutflow, 1): + hcutflow.fill( + dask_awkward.full_like(weight, i, dtype=int), weight=weight + ) + hcutflow.fill(dask_awkward.zeros_like(weight)) + + return honecut, hcutflow, labels + + def plot_vars( + self, + vars, + axes=None, + bins=None, + start=None, + stop=None, + edges=None, + transform=None, + ): + """Plot the histograms of variables for each step of the N-1 selection + + Parameters + ---------- + vars : dict + A dictionary in the form ``{name: array}`` where ``name`` is the name of the variable, + and ``array`` is the corresponding array of values. + The arrays must be the same length as each mask of the cutflow. + axes : list of hist.axis objects, optional + The axes objects to histogram the variables on. This will override all the following arguments that define axes. + Must be the same length as ``vars``. + bins : iterable of integers or Nones, optional + The number of bins for each variable histogram. If not specified, it defaults to 20. + Must be the same length as ``vars``. + start : iterable of floats or integers or Nones, optional + The lower edge of the first bin for each variable histogram. If not specified, it defaults to the minimum value of the variable array. + Must be the same length as ``vars``. + stop : iterable of floats or integers or Nones, optional + The upper edge of the last bin for each variable histogram. If not specified, it defaults to the maximum value of the variable array. + Must be the same length as ``vars``. + edges : list of iterables of floats or integers, optional + The bin edges for each variable histogram. This overrides ``bins``, ``start``, and ``stop`` if specified. + Must be the same length as ``vars``. + transform : iterable of hist.axis.transform objects or Nones, optional + The transforms to apply to each variable histogram axis. If not specified, it defaults to None. + Must be the same length as ``vars``. + + Returns + ------- + histsonecut : list of hist.Hist or hist.dask.Hist objects + A list of 1D histograms of the variables of events surviving each cut alone. + The first axis is the variable, the second axis is the cuts. + histscutflow : list of hist.Hist or hist.dask.Hist objects + A list of 1D histograms of the variables of events surviving the cumulative cutflow. + The first axis is the variable, the second axis is the cuts. + labels : list of strings + The bin labels of the y axis of the histograms. + """ + if self._delayed_mode: + for name, var in vars.items(): + if not compatible_partitions(var, self._masksonecut[0]): + raise IncompatiblePartitions("plot_vars", var, self._masksonecut[0]) + else: + for name, var in vars.items(): + if len(var) != len(self._masksonecut[0]): + raise ValueError( + f"The variable '{name}' has length '{len(var)}', but the masks have length '{len(self._masksonecut[0])}'" + ) + + histsonecut, histscutflow = [], [] + labels = ["initial"] + list(self._names) + + bins = [None] * len(vars) if bins is None else bins + start = [None] * len(vars) if start is None else start + stop = [None] * len(vars) if stop is None else stop + edges = [None] * len(vars) if edges is None else edges + transform = [None] * len(vars) if transform is None else transform + + if axes is not None: + axes = axes + else: + axes = [] + for (name, var), b, s1, s2, e, t in zip( + vars.items(), bins, start, stop, edges, transform + ): + ax = coffea.util._gethistogramaxis( + name, var, b, s1, s2, e, t, self._delayed_mode + ) + axes.append(ax) + + checklengths = [ + len(x) == len(vars) for x in (axes, bins, start, stop, edges, transform) + ] + if not all(checklengths): + raise ValueError( + "vars, axes, bins, start, stop, edges, and transform must be the same length" + ) + + if not self._delayed_mode: + for (name, var), axis in zip(vars.items(), axes): + honecut = hist.Hist( + axis, + hist.axis.Integer(0, len(labels), name="onecut"), + ) + hcutflow = honecut.copy() + hcutflow.axes.name = name, "cutflow" + + arr = awkward.flatten(var) + honecut.fill(arr, awkward.zeros_like(arr)) + hcutflow.fill(arr, awkward.zeros_like(arr)) + + for i, mask in enumerate(self.result().masksonecut, 1): + arr = awkward.flatten(var[mask]) + honecut.fill(arr, awkward.full_like(arr, i, dtype=int)) + histsonecut.append(honecut) + + for i, mask in enumerate(self.result().maskscutflow, 1): + arr = awkward.flatten(var[mask]) + hcutflow.fill(arr, awkward.full_like(arr, i, dtype=int)) + histscutflow.append(hcutflow) + + else: + for (name, var), axis in zip(vars.items(), axes): + honecut = hist.dask.Hist( + axis, + hist.axis.Integer(0, len(labels), name="onecut"), + ) + hcutflow = honecut.copy() + hcutflow.axes.name = name, "cutflow" + + arr = dask_awkward.flatten(var) + honecut.fill(arr, dask_awkward.zeros_like(arr)) + hcutflow.fill(arr, dask_awkward.zeros_like(arr)) + + for i, mask in enumerate(self.result().masksonecut, 1): + arr = dask_awkward.flatten(var[mask]) + honecut.fill(arr, dask_awkward.full_like(arr, i, dtype=int)) + histsonecut.append(honecut) + + for i, mask in enumerate(self.result().maskscutflow, 1): + arr = dask_awkward.flatten(var[mask]) + hcutflow.fill(arr, dask_awkward.full_like(arr, i, dtype=int)) + histscutflow.append(hcutflow) + + return histsonecut, histscutflow, labels + + class PackedSelection: """Store several boolean arrays in a compact manner @@ -439,11 +1044,27 @@ def __init__(self, dtype="uint32"): self._names = [] self._data = None + def __repr__(self): + delayed_mode = None if self._data is None else self.delayed_mode + return f"PackedSelection(selections={tuple(self._names)}, delayed_mode={delayed_mode}, items={len(self._names)}, maxitems={self.maxitems})" + @property def names(self): """Current list of mask names available""" return self._names + @property + def delayed_mode(self): + if isinstance(self._data, dask_awkward.Array): + return True + elif isinstance(self._data, numpy.ndarray): + return False + else: + warnings.warn( + "PackedSelection hasn't been initialized with a boolean array yet!" + ) + return False + @property def maxitems(self): return PackedSelection._supported_types[self._dtype] @@ -456,18 +1077,16 @@ def __add_delayed(self, name, selection, fill_value): selection = dask_awkward.fill_none(selection, fill_value) sel_type = dask_awkward.type(selection) if sel_type.primitive != "bool": - raise ValueError(f"Expected a boolean array, received {selection.dtype}") + raise ValueError(f"Expected a boolean array, received {sel_type.primitive}") if len(self._names) == 0: self._data = dask_awkward.zeros_like(selection, dtype=self._dtype) - if isinstance(selection, dask_awkward.Array) and not isinstance( - self._data, dask_awkward.Array - ): + if isinstance(selection, dask_awkward.Array) and not self.delayed_mode: raise ValueError( f"New selection '{name}' is not eager while PackedSelection is!" ) elif len(self._names) == self.maxitems: raise RuntimeError( - f"Exhausted all slots in {self}, consider a larger dtype or fewer selections" + f"Exhausted all slots in PackedSelection: {self}, consider a larger dtype or fewer selections" ) elif not dask_awkward.lib.core.compatible_partitions(self._data, selection): raise ValueError( @@ -488,15 +1107,13 @@ def __add_eager(self, name, selection, fill_value): raise ValueError(f"Expected a boolean array, received {selection.dtype}") if len(self._names) == 0: self._data = numpy.zeros(len(selection), dtype=self._dtype) - if isinstance(selection, numpy.ndarray) and not isinstance( - self._data, numpy.ndarray - ): + if isinstance(selection, numpy.ndarray) and self.delayed_mode: raise ValueError( - f"New selection '{name}' is not eager while PackedSelection is!" + f"New selection '{name}' is not delayed while PackedSelection is!" ) elif len(self._names) == self.maxitems: raise RuntimeError( - f"Exhausted all slots in {self}, consider a larger dtype or fewer selections" + f"Exhausted all slots in PackedSelection: {self}, consider a larger dtype or fewer selections" ) elif self._data.shape != selection.shape: raise ValueError( @@ -525,12 +1142,29 @@ def add(self, name, selection, fill_value=False): fill_value : bool, optional All masked entries will be filled as specified (default: ``False``) """ + if isinstance(selection, dask.array.Array): + raise ValueError( + "Dask arrays are not supported, please convert them to dask_awkward.Array by using dask_awkward.from_dask_array()" + ) selection = coffea.util._ensure_flat(selection, allow_missing=True) if isinstance(selection, numpy.ndarray): self.__add_eager(name, selection, fill_value) elif isinstance(selection, dask_awkward.Array): self.__add_delayed(name, selection, fill_value) + def add_multiple(self, selections, fill_value=False): + """Add multiple boolean arrays at once, see ``add`` for details + + Parameters + ---------- + selections : dict + a dictionary of selections, in the form ``{name: selection}`` + fill_value : bool, optional + All masked entries will be filled as specified (default: ``False``) + """ + for name, selection in selections.items(): + self.add(name, selection, fill_value) + def require(self, **names): """Return a mask vector corresponding to specific requirements @@ -557,6 +1191,12 @@ def require(self, **names): returns a boolean array where an entry is True if the corresponding entries ``cut1 == True``, ``cut2 == False``, and ``cut3`` arbitrary. """ + for cut, v in names.items(): + if not isinstance(cut, str) or cut not in self._names: + raise ValueError( + "All arguments must be strings that refer to the names of existing selections" + ) + consider = 0 require = 0 for name, val in names.items(): @@ -564,11 +1204,23 @@ def require(self, **names): idx = self._names.index(name) consider |= 1 << idx require |= int(val) << idx - return (self._data & consider) == require + return (self._data & self._dtype.type(consider)) == require def all(self, *names): - """Shorthand for `require`, where all the values are True""" - return self.require(**{name: True for name in names}) + """Shorthand for `require`, where all the values are True. + If no arguments are given, all the added selections are required to be True. + """ + if names: + return self.require(**{name: True for name in names}) + return self.require(**{name: True for name in self._names}) + + def allfalse(self, *names): + """Shorthand for `require`, where all the values are False. + If no arguments are given, all the added selections are required to be False. + """ + if names: + return self.require(**{name: False for name in names}) + return self.require(**{name: False for name in self._names}) def any(self, *names): """Return a mask vector corresponding to an inclusive OR of requirements @@ -593,8 +1245,108 @@ def any(self, *names): returns a boolean array where an entry is True if the corresponding entries ``cut1 == True`` or ``cut2 == False``, and ``cut3`` arbitrary. """ + for cut in names: + if not isinstance(cut, str) or cut not in self._names: + raise ValueError( + "All arguments must be strings that refer to the names of existing selections" + ) consider = 0 for name in names: idx = self._names.index(name) consider |= 1 << idx - return (self._data & consider) != 0 + return (self._data & self._dtype.type(consider)) != 0 + + def nminusone(self, *names): + """Compute the "N-1" style selection for a set of selections + + The N-1 style selection for a set of selections, returns an object which can return a list of the number of events + that pass all the other selections ignoring one at a time. The first element of the returned list + is the total number of events before any selections are applied. + The last element is the final number of events that pass if all selections are applied. + It also returns a list of boolean mask vectors of which events pass the N-1 selection each time. + Can also return a histogram as a ``hist.Hist`` object where the bin heights are the number of events of the N-1 selection list. + If the PackedSelection is in delayed mode, the elements of those lists will be dask_awkward Arrays that can be computed whenever the user wants. + If the histogram is requested, the delayed arrays of the number of events list will be computed in the process in order to set the bin heights. + + Parameters + ---------- + ``*names`` : args + The named selections to use, need to be a subset of the selections already added + + Returns + ------- + res: coffea.analysis_tools.NminusOne + A wrapper class for the results, see the documentation for that class for more details + """ + for cut in names: + if not isinstance(cut, str) or cut not in self._names: + raise ValueError( + "All arguments must be strings that refer to the names of existing selections" + ) + + masks = [] + for i, cut in enumerate(names): + mask = self.all(*(names[:i] + names[i + 1 :])) + masks.append(mask) + mask = self.all(*names) + masks.append(mask) + + if not self.delayed_mode: + nev = [len(self._data)] + nev.extend(numpy.sum(masks, axis=1)) + + else: + nev = [dask_awkward.count(self._data, axis=0)] + nev.extend([dask_awkward.sum(mask) for mask in masks]) + + return NminusOne(names, nev, masks, self.delayed_mode) + + def cutflow(self, *names): + """Compute the cutflow for a set of selections + + Returns an object which can return a list of the number of events that pass all the previous selections including the current one + after each named selection is applied consecutively. The first element + of the returned list is the total number of events before any selections are applied. + The last element is the final number of events that pass after all the selections are applied. + Can also return a cutflow histogram as a ``hist.Hist`` object where the bin heights are the number of events of the cutflow list. + If the PackedSelection is in delayed mode, the elements of the list will be dask_awkward Arrays that can be computed whenever the user wants. + If the histogram is requested, those delayed arrays will be computed in the process in order to set the bin heights. + + Parameters + ---------- + ``*names`` : args + The named selections to use, need to be a subset of the selections already added + + Returns + ------- + res: coffea.analysis_tools.Cutflow + A wrapper class for the results, see the documentation for that class for more details + """ + for cut in names: + if not isinstance(cut, str) or cut not in self._names: + raise ValueError( + "All arguments must be strings that refer to the names of existing selections" + ) + + masksonecut, maskscutflow = [], [] + for i, cut in enumerate(names): + mask1 = self.any(cut) + mask2 = self.all(*(names[: i + 1])) + masksonecut.append(mask1) + maskscutflow.append(mask2) + + if not self.delayed_mode: + nevonecut = [len(self._data)] + nevcutflow = [len(self._data)] + nevonecut.extend(numpy.sum(masksonecut, axis=1)) + nevcutflow.extend(numpy.sum(maskscutflow, axis=1)) + + else: + nevonecut = [dask_awkward.count(self._data, axis=0)] + nevcutflow = [dask_awkward.count(self._data, axis=0)] + nevonecut.extend([dask_awkward.sum(mask1) for mask1 in masksonecut]) + nevcutflow.extend([dask_awkward.sum(mask2) for mask2 in maskscutflow]) + + return Cutflow( + names, nevonecut, nevcutflow, masksonecut, maskscutflow, self.delayed_mode + ) diff --git a/src/coffea/util.py b/src/coffea/util.py index 0c68e5d2b..dc591da26 100644 --- a/src/coffea/util.py +++ b/src/coffea/util.py @@ -6,6 +6,7 @@ import awkward import dask_awkward +import hist import numba import numpy from rich.progress import ( @@ -98,6 +99,25 @@ def _ensure_flat(array, allow_missing=False): return array +def _gethistogramaxis(name, var, bins, start, stop, edges, transform, delayed_mode): + "Get a hist axis for plot_vars in PackedSelection" + + if edges is not None: + return hist.axis.Variable(edges=edges, name=name) + + if not delayed_mode: + start = ak.min(var) - 1e-6 if start is None else start + stop = ak.max(var) + 1e-6 if stop is None else stop + elif delayed_mode: + start = dak.min(var).compute() - 1e-6 if start is None else start + stop = dak.max(var).compute() + 1e-6 if stop is None else stop + bins = 20 if bins is None else bins + + return hist.axis.Regular( + bins=bins, start=start, stop=stop, name=name, transform=transform + ) + + def _exception_chain(exc: BaseException) -> List[BaseException]: """Retrieves the entire exception chain as a list.""" ret = [] diff --git a/tests/test_analysis_tools.py b/tests/test_analysis_tools.py index 65d5c02cd..1e8c46ec1 100644 --- a/tests/test_analysis_tools.py +++ b/tests/test_analysis_tools.py @@ -1,7 +1,26 @@ +import os + import numpy as np import pytest +import uproot from dummy_distributions import dummy_jagged_eta_pt +from coffea.nanoevents import NanoAODSchema, NanoEventsFactory + +fname = "tests/samples/nano_dy.root" +eagerevents = NanoEventsFactory.from_root( + {os.path.abspath(fname): "Events"}, + schemaclass=NanoAODSchema.v6, + metadata={"dataset": "DYJets"}, +).events() +dakevents = NanoEventsFactory.from_root( + {os.path.abspath(fname): "Events"}, + schemaclass=NanoAODSchema, + metadata={"dataset": "DYJets"}, + permit_dask=True, +).events() +uprootevents = uproot.dask({fname: "Events"}) + def test_weights(): from coffea.analysis_tools import Weights @@ -58,58 +77,61 @@ def test_weights(): raise weight.partial_weight(exclude="test") -def test_weights_dak(): +@pytest.mark.parametrize("optimization_enabled", [True, False]) +def test_weights_dak(optimization_enabled): + import dask import dask.array as da import dask_awkward as dak from coffea.analysis_tools import Weights - counts, test_eta, test_pt = dummy_jagged_eta_pt() - scale_central = dak.from_dask_array( - da.random.normal(loc=1.0, scale=0.01, size=counts.size) - ) - scale_up = scale_central * 1.10 - scale_down = scale_central * 0.95 - scale_up_shift = 0.10 * scale_central - scale_down_shift = 0.05 * scale_central + with dask.config.set({"awkward.optimization.enabled": optimization_enabled}): + counts, test_eta, test_pt = dummy_jagged_eta_pt() + scale_central = dak.from_dask_array( + da.random.normal(loc=1.0, scale=0.01, size=counts.size) + ) + scale_up = scale_central * 1.10 + scale_down = scale_central * 0.95 + scale_up_shift = 0.10 * scale_central + scale_down_shift = 0.05 * scale_central + + weight = Weights(None) + weight.add("test", scale_central, weightUp=scale_up, weightDown=scale_down) + weight.add( + "testShift", + scale_central, + weightUp=scale_up_shift, + weightDown=scale_down_shift, + shift=True, + ) - weight = Weights(None) - weight.add("test", scale_central, weightUp=scale_up, weightDown=scale_down) - weight.add( - "testShift", - scale_central, - weightUp=scale_up_shift, - weightDown=scale_down_shift, - shift=True, - ) + var_names = weight.variations + expected_names = ["testShiftUp", "testShiftDown", "testUp", "testDown"] + for name in expected_names: + assert name in var_names - var_names = weight.variations - expected_names = ["testShiftUp", "testShiftDown", "testUp", "testDown"] - for name in expected_names: - assert name in var_names + test_central = weight.weight() + exp_weight = scale_central * scale_central - test_central = weight.weight() - exp_weight = scale_central * scale_central + assert np.all(np.abs(test_central - (exp_weight)).compute() < 1e-6) - assert np.all(np.abs(test_central - (exp_weight)).compute() < 1e-6) + test_up = weight.weight("testUp") + exp_up = scale_central * scale_central * 1.10 - test_up = weight.weight("testUp") - exp_up = scale_central * scale_central * 1.10 + assert np.all(np.abs(test_up - (exp_up)).compute() < 1e-6) - assert np.all(np.abs(test_up - (exp_up)).compute() < 1e-6) + test_down = weight.weight("testDown") + exp_down = scale_central * scale_central * 0.95 - test_down = weight.weight("testDown") - exp_down = scale_central * scale_central * 0.95 + assert np.all(np.abs(test_down - (exp_down)).compute() < 1e-6) - assert np.all(np.abs(test_down - (exp_down)).compute() < 1e-6) + test_shift_up = weight.weight("testUp") - test_shift_up = weight.weight("testUp") + assert np.all(np.abs(test_shift_up - (exp_up)).compute() < 1e-6) - assert np.all(np.abs(test_shift_up - (exp_up)).compute() < 1e-6) + test_shift_down = weight.weight("testDown") - test_shift_down = weight.weight("testDown") - - assert np.all(np.abs(test_shift_down - (exp_down)).compute() < 1e-6) + assert np.all(np.abs(test_shift_down - (exp_down)).compute() < 1e-6) def test_weights_multivariation(): @@ -162,59 +184,62 @@ def test_weights_multivariation(): assert np.all(np.abs(test_down_2 - (exp_down)) < 1e-6) -def test_weights_multivariation_dak(): +@pytest.mark.parametrize("optimization_enabled", [True, False]) +def test_weights_multivariation_dak(optimization_enabled): + import dask import dask.array as da import dask_awkward as dak from coffea.analysis_tools import Weights - counts, test_eta, test_pt = dummy_jagged_eta_pt() - scale_central = dak.from_dask_array( - da.random.normal(loc=1.0, scale=0.01, size=counts.size) - ) - scale_up = scale_central * 1.10 - scale_down = scale_central * 0.95 - scale_up_2 = scale_central * 1.2 - scale_down_2 = scale_central * 0.90 + with dask.config.set({"awkward.optimization.enabled": optimization_enabled}): + counts, test_eta, test_pt = dummy_jagged_eta_pt() + scale_central = dak.from_dask_array( + da.random.normal(loc=1.0, scale=0.01, size=counts.size) + ) + scale_up = scale_central * 1.10 + scale_down = scale_central * 0.95 + scale_up_2 = scale_central * 1.2 + scale_down_2 = scale_central * 0.90 + + weight = Weights(None) + weight.add_multivariation( + "test", + scale_central, + modifierNames=["A", "B"], + weightsUp=[scale_up, scale_up_2], + weightsDown=[scale_down, scale_down_2], + ) - weight = Weights(None) - weight.add_multivariation( - "test", - scale_central, - modifierNames=["A", "B"], - weightsUp=[scale_up, scale_up_2], - weightsDown=[scale_down, scale_down_2], - ) + var_names = weight.variations + expected_names = ["test_AUp", "test_ADown", "test_BUp", "test_BDown"] + for name in expected_names: + assert name in var_names - var_names = weight.variations - expected_names = ["test_AUp", "test_ADown", "test_BUp", "test_BDown"] - for name in expected_names: - assert name in var_names + test_central = weight.weight() + exp_weight = scale_central - test_central = weight.weight() - exp_weight = scale_central + assert np.all(np.abs(test_central - (exp_weight)).compute() < 1e-6) - assert np.all(np.abs(test_central - (exp_weight)).compute() < 1e-6) + test_up = weight.weight("test_AUp") + exp_up = scale_central * 1.10 - test_up = weight.weight("test_AUp") - exp_up = scale_central * 1.10 + assert np.all(np.abs(test_up - (exp_up)).compute() < 1e-6) - assert np.all(np.abs(test_up - (exp_up)).compute() < 1e-6) + test_down = weight.weight("test_ADown") + exp_down = scale_central * 0.95 - test_down = weight.weight("test_ADown") - exp_down = scale_central * 0.95 + assert np.all(np.abs(test_down - (exp_down)).compute() < 1e-6) - assert np.all(np.abs(test_down - (exp_down)).compute() < 1e-6) + test_up_2 = weight.weight("test_BUp") + exp_up = scale_central * 1.2 - test_up_2 = weight.weight("test_BUp") - exp_up = scale_central * 1.2 + assert np.all(np.abs(test_up_2 - (exp_up)).compute() < 1e-6) - assert np.all(np.abs(test_up_2 - (exp_up)).compute() < 1e-6) + test_down_2 = weight.weight("test_BDown") + exp_down = scale_central * 0.90 - test_down_2 = weight.weight("test_BDown") - exp_down = scale_central * 0.90 - - assert np.all(np.abs(test_down_2 - (exp_down)).compute() < 1e-6) + assert np.all(np.abs(test_down_2 - (exp_down)).compute() < 1e-6) def test_weights_partial(): @@ -275,71 +300,83 @@ def test_weights_partial(): assert error_raised -def test_weights_partial_dak(): +@pytest.mark.parametrize("optimization_enabled", [True, False]) +def test_weights_partial_dak(optimization_enabled): + import dask import dask.array as da import dask_awkward as dak from coffea.analysis_tools import Weights - counts, _, _ = dummy_jagged_eta_pt() - w1 = dak.from_dask_array(da.random.normal(loc=1.0, scale=0.01, size=counts.size)) - w2 = dak.from_dask_array(da.random.normal(loc=1.3, scale=0.05, size=counts.size)) + with dask.config.set({"awkward.optimization.enabled": optimization_enabled}): + counts, _, _ = dummy_jagged_eta_pt() + w1 = dak.from_dask_array( + da.random.normal(loc=1.0, scale=0.01, size=counts.size) + ) + w2 = dak.from_dask_array( + da.random.normal(loc=1.3, scale=0.05, size=counts.size) + ) - weights = Weights(None, storeIndividual=True) - weights.add("w1", w1) - weights.add("w2", w2) + weights = Weights(None, storeIndividual=True) + weights.add("w1", w1) + weights.add("w2", w2) - test_exclude_none = weights.weight() - assert np.all(np.abs(test_exclude_none - w1 * w2).compute() < 1e-6) + test_exclude_none = weights.weight() + assert np.all(np.abs(test_exclude_none - w1 * w2).compute() < 1e-6) - test_exclude1 = weights.partial_weight(exclude=["w1"]) - assert np.all(np.abs(test_exclude1 - w2).compute() < 1e-6) + test_exclude1 = weights.partial_weight(exclude=["w1"]) + assert np.all(np.abs(test_exclude1 - w2).compute() < 1e-6) - test_include1 = weights.partial_weight(include=["w1"]) - assert np.all(np.abs(test_include1 - w1).compute() < 1e-6) + test_include1 = weights.partial_weight(include=["w1"]) + assert np.all(np.abs(test_include1 - w1).compute() < 1e-6) - test_exclude2 = weights.partial_weight(exclude=["w2"]) - assert np.all(np.abs(test_exclude2 - w1).compute() < 1e-6) + test_exclude2 = weights.partial_weight(exclude=["w2"]) + assert np.all(np.abs(test_exclude2 - w1).compute() < 1e-6) - test_include2 = weights.partial_weight(include=["w2"]) - assert np.all(np.abs(test_include2 - w2).compute() < 1e-6) + test_include2 = weights.partial_weight(include=["w2"]) + assert np.all(np.abs(test_include2 - w2).compute() < 1e-6) - test_include_both = weights.partial_weight(include=["w1", "w2"]) - assert np.all(np.abs(test_include_both - w1 * w2).compute() < 1e-6) + test_include_both = weights.partial_weight(include=["w1", "w2"]) + assert np.all(np.abs(test_include_both - w1 * w2).compute() < 1e-6) - # Check that exception is thrown if arguments are incompatible - error_raised = False - try: - weights.partial_weight(exclude=["w1"], include=["w2"]) - except ValueError: - error_raised = True - assert error_raised + # Check that exception is thrown if arguments are incompatible + error_raised = False + try: + weights.partial_weight(exclude=["w1"], include=["w2"]) + except ValueError: + error_raised = True + assert error_raised - error_raised = False - try: - weights.partial_weight() - except ValueError: - error_raised = True - assert error_raised + error_raised = False + try: + weights.partial_weight() + except ValueError: + error_raised = True + assert error_raised - # Check that exception is thrown if individual weights - # are not saved from the start - weights = Weights(None, storeIndividual=False) - weights.add("w1", w1) - weights.add("w2", w2) + # Check that exception is thrown if individual weights + # are not saved from the start + weights = Weights(None, storeIndividual=False) + weights.add("w1", w1) + weights.add("w2", w2) - error_raised = False - try: - weights.partial_weight(exclude=["test"], include=["test"]) - except ValueError: - error_raised = True - assert error_raised + error_raised = False + try: + weights.partial_weight(exclude=["test"], include=["test"]) + except ValueError: + error_raised = True + assert error_raised -def test_packed_selection(): +@pytest.mark.parametrize("dtype", ["uint16", "uint32", "uint64"]) +def test_packed_selection_basic(dtype): + import awkward as ak + import dask.array as da + import dask_awkward as dak + from coffea.analysis_tools import PackedSelection - sel = PackedSelection() + sel = PackedSelection(dtype=dtype) shape = (10,) all_true = np.full(shape=shape, fill_value=True, dtype=bool) @@ -347,13 +384,39 @@ def test_packed_selection(): fizz = np.arange(shape[0]) % 3 == 0 buzz = np.arange(shape[0]) % 5 == 0 ones = np.ones(shape=shape, dtype=np.uint64) - wrong_shape = ones = np.ones(shape=(shape[0] - 5,), dtype=bool) + wrong_shape = np.ones(shape=(shape[0] - 5,), dtype=bool) + wrong_type = dak.from_awkward(ak.Array(np.arange(shape[0]) % 3 == 0), 1) + daskarray = da.arange(shape[0]) % 3 == 0 + + with pytest.warns( + UserWarning, + match="PackedSelection hasn't been initialized with a boolean array yet!", + ): + assert sel.delayed_mode is False - sel.add("all_true", all_true) - sel.add("all_false", all_false) sel.add("fizz", fizz) sel.add("buzz", buzz) + assert np.all( + sel.all() + == np.array( + [True, False, False, False, False, False, False, False, False, False] + ) + ) + assert np.all( + sel.allfalse() + == np.array([False, True, True, False, True, False, False, True, True, False]) + ) + + sel.add_multiple({"all_true": all_true, "all_false": all_false}) + + assert sel.delayed_mode is False + with pytest.raises( + ValueError, + match="New selection 'wrong_type' is not eager while PackedSelection is!", + ): + sel.add("wrong_type", wrong_type) + assert np.all(sel.require(all_true=True, all_false=False) == all_true) # allow truthy values assert np.all(sel.require(all_true=1, all_false=0) == all_true) @@ -365,75 +428,873 @@ def test_packed_selection(): [True, False, False, False, False, False, False, False, False, False] ) ) + assert np.all( + sel.allfalse("fizz", "buzz") + == np.array([False, True, True, False, True, False, False, True, True, False]) + ) assert np.all( sel.any("fizz", "buzz") == np.array([True, False, False, True, False, True, True, False, False, True]) ) - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match=r"New selection 'wrong_shape' has a different shape than existing selections \(\(5,\) vs. \(10,\)\)", + ): sel.add("wrong_shape", wrong_shape) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="Expected a boolean array, received uint64"): sel.add("ones", ones) with pytest.raises(RuntimeError): - overpack = PackedSelection() + overpack = PackedSelection(dtype=dtype) for i in range(65): overpack.add("sel_%d", all_true) + with pytest.raises( + ValueError, + match="Dask arrays are not supported, please convert them to dask_awkward.Array by using dask_awkward.from_dask_array()", + ): + sel.add("dask_array", daskarray) + -def test_packed_selection_dak(): +def test_packed_selection_nminusone(): import awkward as ak - import dask_awkward as dak from coffea.analysis_tools import PackedSelection - sel = PackedSelection() + events = eagerevents - shape = (10,) - all_true = dak.from_awkward( - ak.Array(np.full(shape=shape, fill_value=True, dtype=bool)), 1 - ) - all_false = dak.from_awkward( - ak.Array(np.full(shape=shape, fill_value=False, dtype=bool)), 1 + selection = PackedSelection() + + twoelectron = ak.num(events.Electron) == 2 + nomuon = ak.num(events.Muon) == 0 + leadpt20 = ak.any(events.Electron.pt >= 20.0, axis=1) | ak.any( + events.Muon.pt >= 20.0, axis=1 ) - fizz = dak.from_awkward(ak.Array(np.arange(shape[0]) % 3 == 0), 1) - buzz = dak.from_awkward(ak.Array(np.arange(shape[0]) % 5 == 0), 1) - ones = dak.from_awkward(ak.Array(np.ones(shape=shape, dtype=np.uint64)), 1) - wrong_shape = ones = dak.from_awkward( - ak.Array(np.ones(shape=(shape[0] - 5,), dtype=bool)), 1 + + selection.add_multiple( + { + "twoElectron": twoelectron, + "noMuon": nomuon, + "leadPt20": leadpt20, + } ) - sel.add("all_true", all_true) - sel.add("all_false", all_false) - sel.add("fizz", fizz) - sel.add("buzz", buzz) + assert selection.names == ["twoElectron", "noMuon", "leadPt20"] - assert np.all( - sel.require(all_true=True, all_false=False).compute() == all_true.compute() - ) - # allow truthy values - assert np.all(sel.require(all_true=1, all_false=0).compute() == all_true.compute()) - assert np.all(sel.all("all_true", "all_false").compute() == all_false.compute()) - assert np.all(sel.any("all_true", "all_false").compute() == all_true.compute()) - assert np.all( - sel.all("fizz", "buzz").compute() - == np.array( - [True, False, False, False, False, False, False, False, False, False] + with pytest.raises( + ValueError, + match="All arguments must be strings that refer to the names of existing selections", + ): + selection.nminusone("twoElectron", "nonexistent") + nminusone = selection.nminusone("twoElectron", "noMuon", "leadPt20") + + labels, nev, masks = nminusone.result() + + assert labels == ["initial", "N - twoElectron", "N - noMuon", "N - leadPt20", "N"] + + assert nev == [ + len(events), + len(events[nomuon & leadpt20]), + len(events[twoelectron & leadpt20]), + len(events[twoelectron & nomuon]), + len(events[twoelectron & nomuon & leadpt20]), + ] + + for mask, truth in zip( + masks, + [ + nomuon & leadpt20, + twoelectron & leadpt20, + twoelectron & nomuon, + twoelectron & nomuon & leadpt20, + ], + ): + assert np.all(mask == truth) + + nminusone.to_npz("nminusone.npz", compressed=False) + with np.load("nminusone.npz") as file: + assert np.all(file["labels"] == labels) + assert np.all(file["nev"] == nev) + assert np.all(file["masks"] == masks) + os.remove("nminusone.npz") + + nminusone.to_npz("nminusone.npz", compressed=True) + with np.load("nminusone.npz") as file: + assert np.all(file["labels"] == labels) + assert np.all(file["nev"] == nev) + assert np.all(file["masks"] == masks) + os.remove("nminusone.npz") + + h, hlabels = nminusone.yieldhist() + + assert hlabels == ["initial", "N - twoElectron", "N - noMuon", "N - leadPt20", "N"] + + assert np.all(h.axes["N-1"].edges == np.arange(0, 6)) + + assert np.all(h.counts() == nev) + + with pytest.raises(ValueError): + nminusone.plot_vars( + {"Ept": events.Electron.pt, "Ephi": events.Electron.phi[:20]} ) + hs, hslabels = nminusone.plot_vars( + {"Ept": events.Electron.pt, "Ephi": events.Electron.phi} ) - assert np.all( - sel.any("fizz", "buzz").compute() - == np.array([True, False, False, True, False, True, True, False, False, True]) + + assert hslabels == ["initial", "N - twoElectron", "N - noMuon", "N - leadPt20", "N"] + + for h, array in zip(hs, [events.Electron.pt, events.Electron.phi]): + edges = h.axes[0].edges + for i, truth in enumerate( + [ + np.ones(40, dtype=bool), + nomuon & leadpt20, + twoelectron & leadpt20, + twoelectron & nomuon, + twoelectron & nomuon & leadpt20, + ] + ): + counts = h[:, i].counts() + c, e = np.histogram(ak.flatten(array[truth]), bins=edges) + assert np.all(counts == c) + + +def test_packed_selection_cutflow(): + import awkward as ak + + from coffea.analysis_tools import PackedSelection + + events = eagerevents + + selection = PackedSelection() + + twoelectron = ak.num(events.Electron) == 2 + nomuon = ak.num(events.Muon) == 0 + leadpt20 = ak.any(events.Electron.pt >= 20.0, axis=1) | ak.any( + events.Muon.pt >= 20.0, axis=1 ) - with pytest.raises(ValueError): - sel.add("wrong_shape", wrong_shape) + selection.add_multiple( + { + "twoElectron": twoelectron, + "noMuon": nomuon, + "leadPt20": leadpt20, + } + ) + + assert selection.names == ["twoElectron", "noMuon", "leadPt20"] + + with pytest.raises( + ValueError, + match="All arguments must be strings that refer to the names of existing selections", + ): + selection.cutflow("twoElectron", "nonexistent") + cutflow = selection.cutflow("noMuon", "twoElectron", "leadPt20") + + labels, nevonecut, nevcutflow, masksonecut, maskscutflow = cutflow.result() + + assert labels == ["initial", "noMuon", "twoElectron", "leadPt20"] + + assert nevonecut == [ + len(events), + len(events[nomuon]), + len(events[twoelectron]), + len(events[leadpt20]), + ] + + assert nevcutflow == [ + len(events), + len(events[nomuon]), + len(events[nomuon & twoelectron]), + len(events[nomuon & twoelectron & leadpt20]), + ] + + for mask, truth in zip(masksonecut, [nomuon, twoelectron, leadpt20]): + assert np.all(mask == truth) + + for mask, truth in zip( + maskscutflow, [nomuon, nomuon & twoelectron, nomuon & twoelectron & leadpt20] + ): + assert np.all(mask == truth) + + cutflow.to_npz("cutflow.npz", compressed=False) + with np.load("cutflow.npz") as file: + assert np.all(file["labels"] == labels) + assert np.all(file["nevonecut"] == nevonecut) + assert np.all(file["nevcutflow"] == nevcutflow) + assert np.all(file["masksonecut"] == masksonecut) + assert np.all(file["maskscutflow"] == maskscutflow) + os.remove("cutflow.npz") + + cutflow.to_npz("cutflow.npz", compressed=True) + with np.load("cutflow.npz") as file: + assert np.all(file["labels"] == labels) + assert np.all(file["nevonecut"] == nevonecut) + assert np.all(file["nevcutflow"] == nevcutflow) + assert np.all(file["masksonecut"] == masksonecut) + assert np.all(file["maskscutflow"] == maskscutflow) + os.remove("cutflow.npz") + + honecut, hcutflow, hlabels = cutflow.yieldhist() + + assert hlabels == ["initial", "noMuon", "twoElectron", "leadPt20"] + + assert np.all(honecut.axes["onecut"].edges == np.arange(0, 5)) + assert np.all(hcutflow.axes["cutflow"].edges == np.arange(0, 5)) + + assert np.all(honecut.counts() == nevonecut) + assert np.all(hcutflow.counts() == nevcutflow) with pytest.raises(ValueError): - sel.add("ones", ones) + cutflow.plot_vars({"Ept": events.Electron.pt, "Ephi": events.Electron.phi[:20]}) + honecuts, hcutflows, hslabels = cutflow.plot_vars( + {"ept": events.Electron.pt, "ephi": events.Electron.phi} + ) - with pytest.raises(RuntimeError): - overpack = PackedSelection() - for i in range(65): - overpack.add("sel_%d", all_true) + assert hslabels == ["initial", "noMuon", "twoElectron", "leadPt20"] + + for h, array in zip(honecuts, [events.Electron.pt, events.Electron.phi]): + edges = h.axes[0].edges + for i, truth in enumerate( + [np.ones(40, dtype=bool), nomuon, twoelectron, leadpt20] + ): + counts = h[:, i].counts() + c, e = np.histogram(ak.flatten(array[truth]), bins=edges) + assert np.all(counts == c) + + for h, array in zip(hcutflows, [events.Electron.pt, events.Electron.phi]): + edges = h.axes[0].edges + for i, truth in enumerate( + [ + np.ones(40, dtype=bool), + nomuon, + nomuon & twoelectron, + nomuon & twoelectron & leadpt20, + ] + ): + counts = h[:, i].counts() + c, e = np.histogram(ak.flatten(array[truth]), bins=edges) + assert np.all(counts == c) + + +@pytest.mark.parametrize("optimization_enabled", [True, False]) +@pytest.mark.parametrize("dtype", ["uint16", "uint32", "uint64"]) +def test_packed_selection_basic_dak(optimization_enabled, dtype): + import awkward as ak + import dask + import dask.array as da + import dask_awkward as dak + + from coffea.analysis_tools import PackedSelection + + sel = PackedSelection(dtype=dtype) + + with dask.config.set({"awkward.optimization.enabled": optimization_enabled}): + shape = (10,) + all_true = dak.from_awkward( + ak.Array(np.full(shape=shape, fill_value=True, dtype=bool)), 1 + ) + all_false = dak.from_awkward( + ak.Array(np.full(shape=shape, fill_value=False, dtype=bool)), 1 + ) + fizz = dak.from_awkward(ak.Array(np.arange(shape[0]) % 3 == 0), 1) + buzz = dak.from_awkward(ak.Array(np.arange(shape[0]) % 5 == 0), 1) + ones = dak.from_awkward(ak.Array(np.ones(shape=shape, dtype=np.uint64)), 1) + wrong_shape = dak.from_awkward( + ak.Array(np.ones(shape=(shape[0] - 5,), dtype=bool)), 1 + ) + wrong_type = np.arange(shape[0]) % 3 == 0 + daskarray = da.arange(shape[0]) % 3 == 0 + + with pytest.warns( + UserWarning, + match="PackedSelection hasn't been initialized with a boolean array yet!", + ): + assert sel.delayed_mode is False + + sel.add("fizz", fizz) + sel.add("buzz", buzz) + + assert np.all( + sel.all().compute() + == np.array( + [True, False, False, False, False, False, False, False, False, False] + ) + ) + assert np.all( + sel.allfalse().compute() + == np.array( + [False, True, True, False, True, False, False, True, True, False] + ) + ) + + sel.add_multiple({"all_true": all_true, "all_false": all_false}) + + assert sel.delayed_mode is True + with pytest.raises( + ValueError, + match="New selection 'wrong_type' is not delayed while PackedSelection is!", + ): + sel.add("wrong_type", wrong_type) + + assert np.all( + sel.require(all_true=True, all_false=False).compute() == all_true.compute() + ) + # allow truthy values + assert np.all( + sel.require(all_true=1, all_false=0).compute() == all_true.compute() + ) + assert np.all(sel.all("all_true", "all_false").compute() == all_false.compute()) + assert np.all(sel.any("all_true", "all_false").compute() == all_true.compute()) + assert np.all( + sel.all("fizz", "buzz").compute() + == np.array( + [True, False, False, False, False, False, False, False, False, False] + ) + ) + assert np.all( + sel.allfalse("fizz", "buzz").compute() + == np.array( + [False, True, True, False, True, False, False, True, True, False] + ) + ) + assert np.all( + sel.any("fizz", "buzz").compute() + == np.array( + [True, False, False, True, False, True, True, False, False, True] + ) + ) + + with pytest.raises( + ValueError, + match="New selection 'wrong_shape' has a different partition structure than existing selections", + ): + sel.add("wrong_shape", wrong_shape) + + with pytest.raises( + ValueError, match="Expected a boolean array, received uint64" + ): + sel.add("ones", ones) + + with pytest.raises(RuntimeError): + overpack = PackedSelection(dtype=dtype) + for i in range(65): + overpack.add("sel_%d", all_true) + + with pytest.raises( + ValueError, + match="Dask arrays are not supported, please convert them to dask_awkward.Array by using dask_awkward.from_dask_array()", + ): + sel.add("dask_array", daskarray) + + +@pytest.mark.parametrize("optimization_enabled", [True, False]) +def test_packed_selection_nminusone_dak(optimization_enabled): + import dask + import dask_awkward as dak + + from coffea.analysis_tools import PackedSelection + + events = dakevents + + selection = PackedSelection() + + with dask.config.set({"awkward.optimization.enabled": optimization_enabled}): + twoelectron = dak.num(events.Electron) == 2 + nomuon = dak.num(events.Muon) == 0 + leadpt20 = dak.any(events.Electron.pt >= 20.0, axis=1) | dak.any( + events.Muon.pt >= 20.0, axis=1 + ) + + selection.add_multiple( + { + "twoElectron": twoelectron, + "noMuon": nomuon, + "leadPt20": leadpt20, + } + ) + + assert selection.names == ["twoElectron", "noMuon", "leadPt20"] + + with pytest.raises( + ValueError, + match="All arguments must be strings that refer to the names of existing selections", + ): + selection.nminusone("twoElectron", "nonexistent") + nminusone = selection.nminusone("twoElectron", "noMuon", "leadPt20") + + labels, nev, masks = nminusone.result() + + assert labels == [ + "initial", + "N - twoElectron", + "N - noMuon", + "N - leadPt20", + "N", + ] + + assert list(dask.compute(*nev)) == [ + len(events), + len(events[nomuon & leadpt20]), + len(events[twoelectron & leadpt20]), + len(events[twoelectron & nomuon]), + len(events[twoelectron & nomuon & leadpt20]), + ] + + for mask, truth in zip( + masks, + [ + nomuon & leadpt20, + twoelectron & leadpt20, + twoelectron & nomuon, + twoelectron & nomuon & leadpt20, + ], + ): + assert np.all(mask.compute() == truth.compute()) + + nminusone.to_npz("nminusone.npz", compressed=False) + with np.load("nminusone.npz") as file: + assert np.all(file["labels"] == labels) + assert np.all(file["nev"] == list(dask.compute(*nev))) + assert np.all(file["masks"] == list(dask.compute(*masks))) + os.remove("nminusone.npz") + + nminusone.to_npz("nminusone.npz", compressed=True) + with np.load("nminusone.npz") as file: + assert np.all(file["labels"] == labels) + assert np.all(file["nev"] == list(dask.compute(*nev))) + assert np.all(file["masks"] == list(dask.compute(*masks))) + os.remove("nminusone.npz") + + h, hlabels = dask.compute(*nminusone.yieldhist()) + + assert hlabels == [ + "initial", + "N - twoElectron", + "N - noMuon", + "N - leadPt20", + "N", + ] + + assert np.all(h.axes["N-1"].edges == np.arange(0, 6)) + + assert np.all(h.counts() == list(dask.compute(*nev))) + + # with pytest.raises(IncompatiblePartitions): + # nminusone.plot_vars( + # {"Ept": events.Electron.pt, "Ephi": events[:20].Electron.phi} + # ) + hs, hslabels = dask.compute( + *nminusone.plot_vars( + {"Ept": events.Electron.pt, "Ephi": events.Electron.phi} + ) + ) + + assert hslabels == [ + "initial", + "N - twoElectron", + "N - noMuon", + "N - leadPt20", + "N", + ] + + for h, array in zip(hs, [events.Electron.pt, events.Electron.phi]): + edges = h.axes[0].edges + for i, truth in enumerate( + [ + np.ones(40, dtype=bool), + nomuon.compute() & leadpt20.compute(), + twoelectron.compute() & leadpt20.compute(), + twoelectron.compute() & nomuon.compute(), + twoelectron.compute() & nomuon.compute() & leadpt20.compute(), + ] + ): + counts = h[:, i].counts() + c, e = np.histogram(dak.flatten(array[truth]).compute(), bins=edges) + assert np.all(counts == c) + + +@pytest.mark.parametrize("optimization_enabled", [True, False]) +def test_packed_selection_cutflow_dak(optimization_enabled): + import dask + import dask_awkward as dak + + from coffea.analysis_tools import PackedSelection + + events = dakevents + + selection = PackedSelection() + + with dask.config.set({"awkward.optimization.enabled": optimization_enabled}): + twoelectron = dak.num(events.Electron) == 2 + nomuon = dak.num(events.Muon) == 0 + leadpt20 = dak.any(events.Electron.pt >= 20.0, axis=1) | dak.any( + events.Muon.pt >= 20.0, axis=1 + ) + + selection.add_multiple( + { + "twoElectron": twoelectron, + "noMuon": nomuon, + "leadPt20": leadpt20, + } + ) + + assert selection.names == ["twoElectron", "noMuon", "leadPt20"] + + with pytest.raises( + ValueError, + match="All arguments must be strings that refer to the names of existing selections", + ): + selection.cutflow("twoElectron", "nonexistent") + cutflow = selection.cutflow("noMuon", "twoElectron", "leadPt20") + + labels, nevonecut, nevcutflow, masksonecut, maskscutflow = cutflow.result() + + assert labels == ["initial", "noMuon", "twoElectron", "leadPt20"] + + assert list(dask.compute(*nevonecut)) == [ + len(events), + len(events[nomuon]), + len(events[twoelectron]), + len(events[leadpt20]), + ] + + assert list(dask.compute(*nevcutflow)) == [ + len(events), + len(events[nomuon]), + len(events[nomuon & twoelectron]), + len(events[nomuon & twoelectron & leadpt20]), + ] + + for mask, truth in zip(masksonecut, [nomuon, twoelectron, leadpt20]): + assert np.all(mask.compute() == truth.compute()) + + for mask, truth in zip( + maskscutflow, + [nomuon, nomuon & twoelectron, nomuon & twoelectron & leadpt20], + ): + assert np.all(mask.compute() == truth.compute()) + + cutflow.to_npz("cutflow.npz", compressed=False) + with np.load("cutflow.npz") as file: + assert np.all(file["labels"] == labels) + assert np.all(file["nevonecut"] == list(dask.compute(*nevonecut))) + assert np.all(file["nevcutflow"] == list(dask.compute(*nevcutflow))) + assert np.all(file["masksonecut"] == list(dask.compute(*masksonecut))) + assert np.all(file["maskscutflow"] == list(dask.compute(*maskscutflow))) + os.remove("cutflow.npz") + + cutflow.to_npz("cutflow.npz", compressed=True) + with np.load("cutflow.npz") as file: + assert np.all(file["labels"] == labels) + assert np.all(file["nevonecut"] == list(dask.compute(*nevonecut))) + assert np.all(file["nevcutflow"] == list(dask.compute(*nevcutflow))) + assert np.all(file["masksonecut"] == list(dask.compute(*masksonecut))) + assert np.all(file["maskscutflow"] == list(dask.compute(*maskscutflow))) + os.remove("cutflow.npz") + + honecut, hcutflow, hlabels = dask.compute(*cutflow.yieldhist()) + + assert hlabels == ["initial", "noMuon", "twoElectron", "leadPt20"] + + assert np.all(honecut.axes["onecut"].edges == np.arange(0, 5)) + assert np.all(hcutflow.axes["cutflow"].edges == np.arange(0, 5)) + + assert np.all(honecut.counts() == list(dask.compute(*nevonecut))) + assert np.all(hcutflow.counts() == list(dask.compute(*nevcutflow))) + + # with pytest.raises(IncompatiblePartitions): + # cutflow.plot_vars( + # {"Ept": events.Electron.pt, "Ephi": events[:20].Electron.phi} + # ) + honecuts, hcutflows, hslabels = dask.compute( + *cutflow.plot_vars({"ept": events.Electron.pt, "ephi": events.Electron.phi}) + ) + + assert hslabels == ["initial", "noMuon", "twoElectron", "leadPt20"] + + for h, array in zip(honecuts, [events.Electron.pt, events.Electron.phi]): + edges = h.axes[0].edges + for i, truth in enumerate( + [ + np.ones(40, dtype=bool), + nomuon.compute(), + twoelectron.compute(), + leadpt20.compute(), + ] + ): + counts = h[:, i].counts() + c, e = np.histogram(dak.flatten(array[truth]).compute(), bins=edges) + assert np.all(counts == c) + + for h, array in zip(hcutflows, [events.Electron.pt, events.Electron.phi]): + edges = h.axes[0].edges + for i, truth in enumerate( + [ + np.ones(40, dtype=bool), + nomuon.compute(), + (nomuon & twoelectron).compute(), + (nomuon & twoelectron & leadpt20).compute(), + ] + ): + counts = h[:, i].counts() + c, e = np.histogram(dak.flatten(array[truth]).compute(), bins=edges) + assert np.all(counts == c) + + +@pytest.mark.parametrize("optimization_enabled", [True, False]) +def test_packed_selection_nminusone_dak_uproot_only(optimization_enabled): + import dask + import dask_awkward as dak + + from coffea.analysis_tools import PackedSelection + + events = uprootevents + + selection = PackedSelection() + + with dask.config.set({"awkward.optimization.enabled": optimization_enabled}): + twoelectron = dak.num(events.Electron_pt) == 2 + nomuon = dak.num(events.Muon_pt) == 0 + leadpt20 = dak.any(events.Electron_pt >= 20.0, axis=1) | dak.any( + events.Muon_pt >= 20.0, axis=1 + ) + + selection.add_multiple( + { + "twoElectron": twoelectron, + "noMuon": nomuon, + "leadPt20": leadpt20, + } + ) + + assert selection.names == ["twoElectron", "noMuon", "leadPt20"] + + with pytest.raises( + ValueError, + match="All arguments must be strings that refer to the names of existing selections", + ): + selection.nminusone("twoElectron", "nonexistent") + nminusone = selection.nminusone("twoElectron", "noMuon", "leadPt20") + + labels, nev, masks = nminusone.result() + + assert labels == [ + "initial", + "N - twoElectron", + "N - noMuon", + "N - leadPt20", + "N", + ] + + assert list(dask.compute(*nev)) == [ + len(events), + len(events[nomuon & leadpt20]), + len(events[twoelectron & leadpt20]), + len(events[twoelectron & nomuon]), + len(events[twoelectron & nomuon & leadpt20]), + ] + + for mask, truth in zip( + masks, + [ + nomuon & leadpt20, + twoelectron & leadpt20, + twoelectron & nomuon, + twoelectron & nomuon & leadpt20, + ], + ): + assert np.all(mask.compute() == truth.compute()) + + nminusone.to_npz("nminusone.npz", compressed=False) + with np.load("nminusone.npz") as file: + assert np.all(file["labels"] == labels) + assert np.all(file["nev"] == list(dask.compute(*nev))) + assert np.all(file["masks"] == list(dask.compute(*masks))) + os.remove("nminusone.npz") + + nminusone.to_npz("nminusone.npz", compressed=True) + with np.load("nminusone.npz") as file: + assert np.all(file["labels"] == labels) + assert np.all(file["nev"] == list(dask.compute(*nev))) + assert np.all(file["masks"] == list(dask.compute(*masks))) + os.remove("nminusone.npz") + + h, hlabels = dask.compute(*nminusone.yieldhist()) + + assert hlabels == [ + "initial", + "N - twoElectron", + "N - noMuon", + "N - leadPt20", + "N", + ] + + assert np.all(h.axes["N-1"].edges == np.arange(0, 6)) + + assert np.all(h.counts() == list(dask.compute(*nev))) + + # with pytest.raises(IncompatiblePartitions): + # nminusone.plot_vars( + # {"Ept": events.Electron_pt, "Ephi": events[:20].Electron_phi} + # ) + hs, hslabels = dask.compute( + *nminusone.plot_vars( + {"Ept": events.Electron_pt, "Ephi": events.Electron_phi} + ) + ) + + assert hslabels == [ + "initial", + "N - twoElectron", + "N - noMuon", + "N - leadPt20", + "N", + ] + + for h, array in zip(hs, [events.Electron_pt, events.Electron_phi]): + edges = h.axes[0].edges + for i, truth in enumerate( + [ + np.ones(40, dtype=bool), + nomuon.compute() & leadpt20.compute(), + twoelectron.compute() & leadpt20.compute(), + twoelectron.compute() & nomuon.compute(), + twoelectron.compute() & nomuon.compute() & leadpt20.compute(), + ] + ): + counts = h[:, i].counts() + c, e = np.histogram(dak.flatten(array[truth]).compute(), bins=edges) + assert np.all(counts == c) + + +@pytest.mark.parametrize("optimization_enabled", [True, False]) +def test_packed_selection_cutflow_dak_uproot_only(optimization_enabled): + import dask + import dask_awkward as dak + + from coffea.analysis_tools import PackedSelection + + events = uprootevents + + selection = PackedSelection() + + with dask.config.set({"awkward.optimization.enabled": optimization_enabled}): + twoelectron = dak.num(events.Electron_pt) == 2 + nomuon = dak.num(events.Muon_pt) == 0 + leadpt20 = dak.any(events.Electron_pt >= 20.0, axis=1) | dak.any( + events.Muon_pt >= 20.0, axis=1 + ) + + selection.add_multiple( + { + "twoElectron": twoelectron, + "noMuon": nomuon, + "leadPt20": leadpt20, + } + ) + + assert selection.names == ["twoElectron", "noMuon", "leadPt20"] + + with pytest.raises( + ValueError, + match="All arguments must be strings that refer to the names of existing selections", + ): + selection.cutflow("twoElectron", "nonexistent") + cutflow = selection.cutflow("noMuon", "twoElectron", "leadPt20") + + labels, nevonecut, nevcutflow, masksonecut, maskscutflow = cutflow.result() + + assert labels == ["initial", "noMuon", "twoElectron", "leadPt20"] + + assert list(dask.compute(*nevonecut)) == [ + len(events), + len(events[nomuon]), + len(events[twoelectron]), + len(events[leadpt20]), + ] + + assert list(dask.compute(*nevcutflow)) == [ + len(events), + len(events[nomuon]), + len(events[nomuon & twoelectron]), + len(events[nomuon & twoelectron & leadpt20]), + ] + + for mask, truth in zip(masksonecut, [nomuon, twoelectron, leadpt20]): + assert np.all(mask.compute() == truth.compute()) + + for mask, truth in zip( + maskscutflow, + [nomuon, nomuon & twoelectron, nomuon & twoelectron & leadpt20], + ): + assert np.all(mask.compute() == truth.compute()) + + cutflow.to_npz("cutflow.npz", compressed=False) + with np.load("cutflow.npz") as file: + assert np.all(file["labels"] == labels) + assert np.all(file["nevonecut"] == list(dask.compute(*nevonecut))) + assert np.all(file["nevcutflow"] == list(dask.compute(*nevcutflow))) + assert np.all(file["masksonecut"] == list(dask.compute(*masksonecut))) + assert np.all(file["maskscutflow"] == list(dask.compute(*maskscutflow))) + os.remove("cutflow.npz") + + cutflow.to_npz("cutflow.npz", compressed=True) + with np.load("cutflow.npz") as file: + assert np.all(file["labels"] == labels) + assert np.all(file["nevonecut"] == list(dask.compute(*nevonecut))) + assert np.all(file["nevcutflow"] == list(dask.compute(*nevcutflow))) + assert np.all(file["masksonecut"] == list(dask.compute(*masksonecut))) + assert np.all(file["maskscutflow"] == list(dask.compute(*maskscutflow))) + os.remove("cutflow.npz") + + honecut, hcutflow, hlabels = dask.compute(*cutflow.yieldhist()) + + assert hlabels == ["initial", "noMuon", "twoElectron", "leadPt20"] + + assert np.all(honecut.axes["onecut"].edges == np.arange(0, 5)) + assert np.all(hcutflow.axes["cutflow"].edges == np.arange(0, 5)) + + assert np.all(honecut.counts() == list(dask.compute(*nevonecut))) + assert np.all(hcutflow.counts() == list(dask.compute(*nevcutflow))) + + # with pytest.raises(IncompatiblePartitions): + # cutflow.plot_vars( + # {"Ept": events.Electron_pt, "Ephi": events[:20].Electron_phi} + # ) + honecuts, hcutflows, hslabels = dask.compute( + *cutflow.plot_vars({"ept": events.Electron_pt, "ephi": events.Electron_phi}) + ) + + assert hslabels == ["initial", "noMuon", "twoElectron", "leadPt20"] + + for h, array in zip(honecuts, [events.Electron_pt, events.Electron_phi]): + edges = h.axes[0].edges + for i, truth in enumerate( + [ + np.ones(40, dtype=bool), + nomuon.compute(), + twoelectron.compute(), + leadpt20.compute(), + ] + ): + counts = h[:, i].counts() + c, e = np.histogram(dak.flatten(array[truth]).compute(), bins=edges) + assert np.all(counts == c) + + for h, array in zip(hcutflows, [events.Electron_pt, events.Electron_phi]): + edges = h.axes[0].edges + for i, truth in enumerate( + [ + np.ones(40, dtype=bool), + nomuon.compute(), + (nomuon & twoelectron).compute(), + (nomuon & twoelectron & leadpt20).compute(), + ] + ): + counts = h[:, i].counts() + c, e = np.histogram(dak.flatten(array[truth]).compute(), bins=edges) + assert np.all(counts == c) diff --git a/tests/test_jetmet_tools.py b/tests/test_jetmet_tools.py index 96c5002df..a7ef91385 100644 --- a/tests/test_jetmet_tools.py +++ b/tests/test_jetmet_tools.py @@ -1,8 +1,10 @@ import time import awkward as ak +import dask import dask_awkward as dak import pyinstrument +import pytest from dummy_distributions import dummy_jagged_eta_pt from coffea.util import numpy as np @@ -38,869 +40,916 @@ def jetmet_evaluator(): evaluator = jetmet_evaluator() -def test_factorized_jet_corrector(): +@pytest.mark.parametrize("optimization_enabled", [True, False]) +def test_factorized_jet_corrector(optimization_enabled): from coffea.jetmet_tools import FactorizedJetCorrector - counts, test_eta, test_pt = dummy_jagged_eta_pt() - test_Rho = np.full_like(test_eta, 100.0) - test_A = np.full_like(test_eta, 5.0) - - # Check that the FactorizedJetCorrector is functional - jec_names = [ - "Summer16_23Sep2016V3_MC_L1FastJet_AK4PFPuppi", - "Summer16_23Sep2016V3_MC_L2Relative_AK4PFPuppi", - "Summer16_23Sep2016V3_MC_L2L3Residual_AK4PFPuppi", - "Summer16_23Sep2016V3_MC_L3Absolute_AK4PFPuppi", - ] - corrector = FactorizedJetCorrector(**{name: evaluator[name] for name in jec_names}) + with dask.config.set({"awkward.optimization.enabled": optimization_enabled}): + counts, test_eta, test_pt = dummy_jagged_eta_pt() + test_Rho = np.full_like(test_eta, 100.0) + test_A = np.full_like(test_eta, 5.0) + + # Check that the FactorizedJetCorrector is functional + jec_names = [ + "Summer16_23Sep2016V3_MC_L1FastJet_AK4PFPuppi", + "Summer16_23Sep2016V3_MC_L2Relative_AK4PFPuppi", + "Summer16_23Sep2016V3_MC_L2L3Residual_AK4PFPuppi", + "Summer16_23Sep2016V3_MC_L3Absolute_AK4PFPuppi", + ] + corrector = FactorizedJetCorrector( + **{name: evaluator[name] for name in jec_names} + ) - print(corrector) + print(corrector) - pt_copy = np.copy(test_pt) + pt_copy = np.copy(test_pt) - # Check that the corrector can be evaluated for flattened arrays - corrs = corrector.getCorrection( - JetEta=test_eta, Rho=test_Rho, JetPt=test_pt, JetA=test_A - ) + # Check that the corrector can be evaluated for flattened arrays + corrs = corrector.getCorrection( + JetEta=test_eta, Rho=test_Rho, JetPt=test_pt, JetA=test_A + ) - assert (np.abs(pt_copy - test_pt) < 1e-6).all() + assert (np.abs(pt_copy - test_pt) < 1e-6).all() - test_pt_jag = ak.unflatten(test_pt, counts) - test_eta_jag = ak.unflatten(test_eta, counts) - test_Rho_jag = ak.unflatten(test_Rho, counts) - test_A_jag = ak.unflatten(test_A, counts) + test_pt_jag = ak.unflatten(test_pt, counts) + test_eta_jag = ak.unflatten(test_eta, counts) + test_Rho_jag = ak.unflatten(test_Rho, counts) + test_A_jag = ak.unflatten(test_A, counts) - test_pt_dak = dak.from_awkward(test_pt_jag, 1) - test_eta_dak = dak.from_awkward(test_eta_jag, 1) - test_Rho_dak = dak.from_awkward(test_Rho_jag, 1) - test_A_dak = dak.from_awkward(test_A_jag, 1) + test_pt_dak = dak.from_awkward(test_pt_jag, 1) + test_eta_dak = dak.from_awkward(test_eta_jag, 1) + test_Rho_dak = dak.from_awkward(test_Rho_jag, 1) + test_A_dak = dak.from_awkward(test_A_jag, 1) - # Check that the corrector can be evaluated for jagged arrays - corrs_jag = corrector.getCorrection( - JetEta=test_eta_jag, - Rho=test_Rho_jag, - JetPt=test_pt_jag, - JetA=test_A_jag, - ) + # Check that the corrector can be evaluated for jagged arrays + corrs_jag = corrector.getCorrection( + JetEta=test_eta_jag, + Rho=test_Rho_jag, + JetPt=test_pt_jag, + JetA=test_A_jag, + ) - print(corrs_jag) + print(corrs_jag) - corrs_dak = corrector.getCorrection( - JetEta=test_eta_dak, - Rho=test_Rho_dak, - JetPt=test_pt_dak, - JetA=test_A_dak, - ) + corrs_dak = corrector.getCorrection( + JetEta=test_eta_dak, + Rho=test_Rho_dak, + JetPt=test_pt_dak, + JetA=test_A_dak, + ) - print(corrs_dak) - print(corrs_dak.dask) - - assert ak.all(np.abs(pt_copy - ak.flatten(test_pt_jag)) < 1e-6) - assert ak.all(np.abs(corrs - ak.flatten(corrs_jag)) < 1e-6) - assert ak.all(np.abs(corrs - ak.flatten(corrs_dak.compute())) < 1e-6) - - # Check that the corrector returns the correct answers for each level of correction - # Use a subset of the values so that we can check the corrections by hand - test_pt_jag = test_pt_jag[0:3] - test_eta_jag = test_eta_jag[0:3] - test_Rho_jag = test_Rho_jag[0:3] - test_A_jag = test_A_jag[0:3] - counts = counts[0:3] - test_pt_dak = test_pt_dak[0:3] - test_eta_dak = test_eta_dak[0:3] - test_Rho_dak = test_Rho_dak[0:3] - test_A_dak = test_A_dak[0:3] - print("Raw jet values:") - print("pT:", test_pt_jag) - print("eta:", test_eta_jag) - print("rho:", test_Rho_jag) - print("area:", test_A_jag, "\n") - - # Start by checking the L1 corrections - corrs_L1_jag_ref = ak.full_like(test_pt_jag, 1.0) - corrector = FactorizedJetCorrector( - **{name: evaluator[name] for name in jec_names[0:1]} - ) - corrs_L1_jag = corrector.getCorrection( - JetEta=test_eta_jag, Rho=test_Rho_jag, JetPt=test_pt_jag, JetA=test_A_jag - ) + print(corrs_dak) + print(corrs_dak.dask) + + assert ak.all(np.abs(pt_copy - ak.flatten(test_pt_jag)) < 1e-6) + assert ak.all(np.abs(corrs - ak.flatten(corrs_jag)) < 1e-6) + assert ak.all(np.abs(corrs - ak.flatten(corrs_dak.compute())) < 1e-6) + + # Check that the corrector returns the correct answers for each level of correction + # Use a subset of the values so that we can check the corrections by hand + test_pt_jag = test_pt_jag[0:3] + test_eta_jag = test_eta_jag[0:3] + test_Rho_jag = test_Rho_jag[0:3] + test_A_jag = test_A_jag[0:3] + counts = counts[0:3] + test_pt_dak = test_pt_dak[0:3] + test_eta_dak = test_eta_dak[0:3] + test_Rho_dak = test_Rho_dak[0:3] + test_A_dak = test_A_dak[0:3] + print("Raw jet values:") + print("pT:", test_pt_jag) + print("eta:", test_eta_jag) + print("rho:", test_Rho_jag) + print("area:", test_A_jag, "\n") + + # Start by checking the L1 corrections + corrs_L1_jag_ref = ak.full_like(test_pt_jag, 1.0) + corrector = FactorizedJetCorrector( + **{name: evaluator[name] for name in jec_names[0:1]} + ) + corrs_L1_jag = corrector.getCorrection( + JetEta=test_eta_jag, Rho=test_Rho_jag, JetPt=test_pt_jag, JetA=test_A_jag + ) - corrs_L1_dak = corrector.getCorrection( - JetEta=test_eta_dak, Rho=test_Rho_dak, JetPt=test_pt_dak, JetA=test_A_dak - ) + corrs_L1_dak = corrector.getCorrection( + JetEta=test_eta_dak, Rho=test_Rho_dak, JetPt=test_pt_dak, JetA=test_A_dak + ) - print(corrs_L1_dak) - print(corrs_L1_dak.dask) + print(corrs_L1_dak) + print(corrs_L1_dak.dask) - print("Reference L1 corrections:", corrs_L1_jag_ref) - print("Calculated L1 corrections:", corrs_L1_jag) - assert ak.all( - np.abs(ak.flatten(corrs_L1_jag_ref) - ak.flatten(corrs_L1_jag)) < 1e-6 - ) - assert ak.all( - np.abs(ak.flatten(corrs_L1_jag_ref) - ak.flatten(corrs_L1_dak.compute())) < 1e-6 - ) + print("Reference L1 corrections:", corrs_L1_jag_ref) + print("Calculated L1 corrections:", corrs_L1_jag) + assert ak.all( + np.abs(ak.flatten(corrs_L1_jag_ref) - ak.flatten(corrs_L1_jag)) < 1e-6 + ) + assert ak.all( + np.abs(ak.flatten(corrs_L1_jag_ref) - ak.flatten(corrs_L1_dak.compute())) + < 1e-6 + ) - # Apply the L1 corrections and save the result - test_ptL1_jag = test_pt_jag * corrs_L1_jag - print("L1 corrected pT values:", test_ptL1_jag, "\n") - assert ak.all(np.abs(ak.flatten(test_pt_jag) - ak.flatten(test_ptL1_jag)) < 1e-6) + # Apply the L1 corrections and save the result + test_ptL1_jag = test_pt_jag * corrs_L1_jag + print("L1 corrected pT values:", test_ptL1_jag, "\n") + assert ak.all( + np.abs(ak.flatten(test_pt_jag) - ak.flatten(test_ptL1_jag)) < 1e-6 + ) - test_ptL1_dak = test_pt_dak * corrs_L1_dak - print(test_ptL1_dak) - assert ak.all( - np.abs(ak.flatten(test_pt_jag) - ak.flatten(test_ptL1_dak.compute())) < 1e-6 - ) + test_ptL1_dak = test_pt_dak * corrs_L1_dak + print(test_ptL1_dak) + assert ak.all( + np.abs(ak.flatten(test_pt_jag) - ak.flatten(test_ptL1_dak.compute())) < 1e-6 + ) - # Check the L2 corrections on a subset of jets - # Look up the parameters for the L2 corrections by hand and calculate the corrections - # [(1.37906,35.8534,-0.00829227,7.96644e-05,5.18988e-06), - # (1.38034,17.9841,-0.00729638,-0.000127141,5.70889e-05), - # (1.74466,18.6372,-0.0367036,0.00310864,-0.000277062), - # (1.4759,24.8882,-0.0155333,0.0020836,-0.000198039), - # (1.14606,36.4215,-0.00174801,-1.76393e-05,1.91863e-06), - # (0.999657,4.02981,1.06597,-0.619679,-0.0494)], - # [(1.54524,23.9023,-0.0162807,0.000665243,-4.66608e-06), - # (1.48431,8.68725,0.00642424,0.0252104,-0.0335696)]]) - corrs_L2_jag_ref = ak.unflatten( - np.array( - [ - 1.37038741364, - 1.37710384514, - 1.65148641108, - 1.46840446827, - 1.1328319784, - 1.0, - 1.50762056349, - 1.48719866989, - ] - ), - counts, - ) - corrector = FactorizedJetCorrector( - **{name: evaluator[name] for name in jec_names[1:2]} - ) - corrs_L2_jag = corrector.getCorrection( - JetEta=test_eta_jag, JetPt=corrs_L1_jag * test_pt_jag - ) - corrs_L2_dak = corrector.getCorrection( - JetEta=test_eta_dak, JetPt=corrs_L1_dak * test_pt_dak - ) - print("Reference L2 corrections:", corrs_L2_jag_ref.tolist()) - print("Calculated L2 corrections:", corrs_L2_jag.tolist()) - assert ak.all( - np.abs(ak.flatten(corrs_L2_jag_ref) - ak.flatten(corrs_L2_jag)) < 1e-6 - ) - assert ak.all( - np.abs(ak.flatten(corrs_L2_jag_ref) - ak.flatten(corrs_L2_dak.compute())) < 1e-6 - ) + # Check the L2 corrections on a subset of jets + # Look up the parameters for the L2 corrections by hand and calculate the corrections + # [(1.37906,35.8534,-0.00829227,7.96644e-05,5.18988e-06), + # (1.38034,17.9841,-0.00729638,-0.000127141,5.70889e-05), + # (1.74466,18.6372,-0.0367036,0.00310864,-0.000277062), + # (1.4759,24.8882,-0.0155333,0.0020836,-0.000198039), + # (1.14606,36.4215,-0.00174801,-1.76393e-05,1.91863e-06), + # (0.999657,4.02981,1.06597,-0.619679,-0.0494)], + # [(1.54524,23.9023,-0.0162807,0.000665243,-4.66608e-06), + # (1.48431,8.68725,0.00642424,0.0252104,-0.0335696)]]) + corrs_L2_jag_ref = ak.unflatten( + np.array( + [ + 1.37038741364, + 1.37710384514, + 1.65148641108, + 1.46840446827, + 1.1328319784, + 1.0, + 1.50762056349, + 1.48719866989, + ] + ), + counts, + ) + corrector = FactorizedJetCorrector( + **{name: evaluator[name] for name in jec_names[1:2]} + ) + corrs_L2_jag = corrector.getCorrection( + JetEta=test_eta_jag, JetPt=corrs_L1_jag * test_pt_jag + ) + corrs_L2_dak = corrector.getCorrection( + JetEta=test_eta_dak, JetPt=corrs_L1_dak * test_pt_dak + ) + print("Reference L2 corrections:", corrs_L2_jag_ref.tolist()) + print("Calculated L2 corrections:", corrs_L2_jag.tolist()) + assert ak.all( + np.abs(ak.flatten(corrs_L2_jag_ref) - ak.flatten(corrs_L2_jag)) < 1e-6 + ) + assert ak.all( + np.abs(ak.flatten(corrs_L2_jag_ref) - ak.flatten(corrs_L2_dak.compute())) + < 1e-6 + ) - # Apply the L2 corrections and save the result - test_ptL1L2_jag = test_ptL1_jag * corrs_L2_jag - print("L1L2 corrected pT values:", test_ptL1L2_jag, "\n") - test_ptL1L2_dak = test_ptL1_dak * corrs_L2_dak - print("L1L2 corrected pT values:", test_ptL1L2_dak.compute(), "\n") - print(test_ptL1L2_dak) - print(test_ptL1L2_dak.dask) - - # Apply the L3 corrections and save the result - corrs_L3_jag = ak.full_like(test_pt_jag, 1.0) - test_ptL1L2L3_jag = test_ptL1L2_jag * corrs_L3_jag - print("L1L2L3 corrected pT values:", test_ptL1L2L3_jag, "\n") - - corrs_L3_dak = dak.ones_like(test_pt_dak) - test_ptL1L2L3_dak = test_ptL1L2_dak * corrs_L3_dak - print("L1L2L3 corrected pT values:", test_ptL1L2L3_dak.compute(), "\n") - print(test_ptL1L2L3_dak) - print(test_ptL1L2L3_dak.dask) - - # Check that the corrections can be chained together - corrs_L1L2L3_jag_ref = ak.unflatten( - np.array( - [ - 1.37038741364, - 1.37710384514, - 1.65148641108, - 1.46840446827, - 1.1328319784, - 1.0, - 1.50762056349, - 1.48719866989, - ] - ), - counts, - ) - corrector = FactorizedJetCorrector( - **{name: evaluator[name] for name in (jec_names[0:2] + jec_names[3:])} - ) - corrs_L1L2L3_jag = corrector.getCorrection( - JetEta=test_eta_jag, Rho=test_Rho_jag, JetPt=test_pt_jag, JetA=test_A_jag - ) + # Apply the L2 corrections and save the result + test_ptL1L2_jag = test_ptL1_jag * corrs_L2_jag + print("L1L2 corrected pT values:", test_ptL1L2_jag, "\n") + test_ptL1L2_dak = test_ptL1_dak * corrs_L2_dak + print("L1L2 corrected pT values:", test_ptL1L2_dak.compute(), "\n") + print(test_ptL1L2_dak) + print(test_ptL1L2_dak.dask) + + # Apply the L3 corrections and save the result + corrs_L3_jag = ak.full_like(test_pt_jag, 1.0) + test_ptL1L2L3_jag = test_ptL1L2_jag * corrs_L3_jag + print("L1L2L3 corrected pT values:", test_ptL1L2L3_jag, "\n") + + corrs_L3_dak = dak.ones_like(test_pt_dak) + test_ptL1L2L3_dak = test_ptL1L2_dak * corrs_L3_dak + print("L1L2L3 corrected pT values:", test_ptL1L2L3_dak.compute(), "\n") + print(test_ptL1L2L3_dak) + print(test_ptL1L2L3_dak.dask) + + # Check that the corrections can be chained together + corrs_L1L2L3_jag_ref = ak.unflatten( + np.array( + [ + 1.37038741364, + 1.37710384514, + 1.65148641108, + 1.46840446827, + 1.1328319784, + 1.0, + 1.50762056349, + 1.48719866989, + ] + ), + counts, + ) + corrector = FactorizedJetCorrector( + **{name: evaluator[name] for name in (jec_names[0:2] + jec_names[3:])} + ) + corrs_L1L2L3_jag = corrector.getCorrection( + JetEta=test_eta_jag, Rho=test_Rho_jag, JetPt=test_pt_jag, JetA=test_A_jag + ) - corrs_L1L2L3_dak = corrector.getCorrection( - JetEta=test_eta_dak, Rho=test_Rho_dak, JetPt=test_pt_dak, JetA=test_A_dak - ) + corrs_L1L2L3_dak = corrector.getCorrection( + JetEta=test_eta_dak, Rho=test_Rho_dak, JetPt=test_pt_dak, JetA=test_A_dak + ) - print("Reference L1L2L3 corrections:", corrs_L1L2L3_jag_ref) - print("Calculated L1L2L3 corrections:", corrs_L1L2L3_jag) - print("Calculated L1L2L3 corrections:", corrs_L1L2L3_dak.compute()) - assert ak.all( - np.abs(ak.flatten(corrs_L1L2L3_jag_ref) - ak.flatten(corrs_L1L2L3_jag)) < 1e-6 - ) - assert ak.all( - np.abs( - ak.flatten(corrs_L1L2L3_jag_ref) - ak.flatten(corrs_L1L2L3_dak.compute()) + print("Reference L1L2L3 corrections:", corrs_L1L2L3_jag_ref) + print("Calculated L1L2L3 corrections:", corrs_L1L2L3_jag) + print("Calculated L1L2L3 corrections:", corrs_L1L2L3_dak.compute()) + assert ak.all( + np.abs(ak.flatten(corrs_L1L2L3_jag_ref) - ak.flatten(corrs_L1L2L3_jag)) + < 1e-6 + ) + assert ak.all( + np.abs( + ak.flatten(corrs_L1L2L3_jag_ref) + - ak.flatten(corrs_L1L2L3_dak.compute()) + ) + < 1e-6 + ) + print(corrs_L1L2L3_dak) + print(corrs_L1L2L3_dak.dask) + + # Apply the L1L2L3 corrections and save the result + test_ptL1L2L3chain_jag = test_pt_jag * corrs_L1L2L3_jag + print("Chained L1L2L3 corrected pT values:", test_ptL1L2L3chain_jag, "\n") + assert ak.all( + np.abs(ak.flatten(test_ptL1L2L3_jag) - ak.flatten(test_ptL1L2L3chain_jag)) + < 1e-6 ) - < 1e-6 - ) - print(corrs_L1L2L3_dak) - print(corrs_L1L2L3_dak.dask) - - # Apply the L1L2L3 corrections and save the result - test_ptL1L2L3chain_jag = test_pt_jag * corrs_L1L2L3_jag - print("Chained L1L2L3 corrected pT values:", test_ptL1L2L3chain_jag, "\n") - assert ak.all( - np.abs(ak.flatten(test_ptL1L2L3_jag) - ak.flatten(test_ptL1L2L3chain_jag)) - < 1e-6 - ) - test_ptL1L2L3chain_dak = test_pt_dak * corrs_L1L2L3_dak - print("Chained L1L2L3 corrected pT values:", test_ptL1L2L3chain_dak.compute(), "\n") - assert ak.all( - np.abs( - ak.flatten(test_ptL1L2L3_jag) - ak.flatten(test_ptL1L2L3chain_dak.compute()) + test_ptL1L2L3chain_dak = test_pt_dak * corrs_L1L2L3_dak + print( + "Chained L1L2L3 corrected pT values:", + test_ptL1L2L3chain_dak.compute(), + "\n", ) - < 1e-6 - ) - print(test_ptL1L2L3chain_dak) - print(test_ptL1L2L3chain_dak.dask) + assert ak.all( + np.abs( + ak.flatten(test_ptL1L2L3_jag) + - ak.flatten(test_ptL1L2L3chain_dak.compute()) + ) + < 1e-6 + ) + print(test_ptL1L2L3chain_dak) + print(test_ptL1L2L3chain_dak.dask) -def test_jet_resolution(): +@pytest.mark.parametrize("optimization_enabled", [True, False]) +def test_jet_resolution(optimization_enabled): from coffea.jetmet_tools import JetResolution - counts, test_eta, test_pt = dummy_jagged_eta_pt() - test_Rho = np.full_like(test_eta, 10.0) - - test_pt_jag = ak.unflatten(test_pt, counts) - test_eta_jag = ak.unflatten(test_eta, counts) - test_Rho_jag = ak.unflatten(test_Rho, counts) + with dask.config.set({"awkward.optimization.enabled": optimization_enabled}): + counts, test_eta, test_pt = dummy_jagged_eta_pt() + test_Rho = np.full_like(test_eta, 10.0) - test_pt_dak = dak.from_awkward(test_pt_jag, 1) - test_eta_dak = dak.from_awkward(test_eta_jag, 1) - test_Rho_dak = dak.from_awkward(test_Rho_jag, 1) + test_pt_jag = ak.unflatten(test_pt, counts) + test_eta_jag = ak.unflatten(test_eta, counts) + test_Rho_jag = ak.unflatten(test_Rho, counts) - jer_names = ["Spring16_25nsV10_MC_PtResolution_AK4PFPuppi"] - reso = JetResolution(**{name: evaluator[name] for name in jer_names}) + test_pt_dak = dak.from_awkward(test_pt_jag, 1) + test_eta_dak = dak.from_awkward(test_eta_jag, 1) + test_Rho_dak = dak.from_awkward(test_Rho_jag, 1) - print(reso) + jer_names = ["Spring16_25nsV10_MC_PtResolution_AK4PFPuppi"] + reso = JetResolution(**{name: evaluator[name] for name in jer_names}) - resos = reso.getResolution(JetEta=test_eta, Rho=test_Rho, JetPt=test_pt) - resos_jag = reso.getResolution( - JetEta=test_eta_jag, Rho=test_Rho_jag, JetPt=test_pt_jag - ) - resos_dak = reso.getResolution( - JetEta=test_eta_dak, Rho=test_Rho_dak, JetPt=test_pt_dak - ) - assert ak.all(np.abs(resos - ak.flatten(resos_jag)) < 1e-6) - assert ak.all(np.abs(resos - ak.flatten(resos_dak.compute())) < 1e-6) - print(resos_dak) - print(resos_dak.dask) - - test_pt_jag = test_pt_jag[0:3] - test_eta_jag = test_eta_jag[0:3] - test_Rho_jag = test_Rho_jag[0:3] - test_Rho_jag = ak.concatenate( - [test_Rho_jag[:-1], [ak.concatenate([test_Rho_jag[-1, :-1], 100.0])]] - ) - counts = counts[0:3] - print("Raw jet values:") - print("pT:", test_pt_jag) - print("eta:", test_eta_jag) - print("rho:", test_Rho_jag, "\n") - - resos_jag_ref = ak.unflatten( - np.array( - [ - 0.21974642, - 0.32421591, - 0.33702479, - 0.27420327, - 0.13940689, - 0.48134521, - 0.26564994, - 1.0, - ] - ), - counts, - ) - resos_jag = reso.getResolution( - JetEta=test_eta_jag, Rho=test_Rho_jag, JetPt=test_pt_jag - ) - print("Reference Resolution (jagged):", resos_jag_ref) - print("Resolution (jagged):", resos_jag) - # NB: 5e-4 tolerance was agreed upon by lgray and aperloff, if the differences get bigger over time - # we need to agree upon how these numbers are evaluated (double/float conversion is kinda random) - assert ak.all(np.abs(ak.flatten(resos_jag_ref) - ak.flatten(resos_jag)) < 5e-4) + print(reso) - -def test_jet_correction_uncertainty(): - from coffea.jetmet_tools import JetCorrectionUncertainty - - counts, test_eta, test_pt = dummy_jagged_eta_pt() - - test_pt_jag = ak.unflatten(test_pt, counts) - test_eta_jag = ak.unflatten(test_eta, counts) - - test_pt_dak = dak.from_awkward(test_pt_jag, 1) - test_eta_dak = dak.from_awkward(test_eta_jag, 1) - - junc_names = ["Summer16_23Sep2016V3_MC_Uncertainty_AK4PFPuppi"] - junc = JetCorrectionUncertainty(**{name: evaluator[name] for name in junc_names}) - - print(junc) - - juncs = junc.getUncertainty(JetEta=test_eta, JetPt=test_pt) - - juncs_jag = list(junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag)) - - juncs_dak = list(junc.getUncertainty(JetEta=test_eta_dak, JetPt=test_pt_dak)) - - for i, (level, corrs) in enumerate(juncs): - assert corrs.shape[0] == test_eta.shape[0] - assert ak.all(corrs == ak.flatten(juncs_jag[i][1])) - assert ak.all(corrs == ak.flatten(juncs_dak[i][1].compute())) - - zipped_dak = dak.zip({k: v for k, v in juncs_dak}) - print(zipped_dak) - print(zipped_dak.dask) - - test_pt_jag = test_pt_jag[0:3] - test_eta_jag = test_eta_jag[0:3] - counts = counts[0:3] - print("Raw jet values:") - print("pT:", test_pt_jag.tolist()) - print("eta:", test_eta_jag.tolist(), "\n") - - juncs_jag_ref = ak.unflatten( - np.array( - [ - [1.053504214, 0.946495786], - [1.033343349, 0.966656651], - [1.065159157, 0.934840843], - [1.033140127, 0.966859873], - [1.016858652, 0.983141348], - [1.130199999, 0.869800001], - [1.039968468, 0.960031532], - [1.033100002, 0.966899998], - ] - ), - counts, - ) - juncs_jag = list(junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag)) - - for i, (level, corrs) in enumerate(juncs_jag): - print("Index:", i) - print("Correction level:", level) - print("Reference Uncertainties (jagged):", juncs_jag_ref) - print("Uncertainties (jagged):", corrs) - assert ak.all(np.abs(ak.flatten(juncs_jag_ref) - ak.flatten(corrs)) < 1e-6) + resos = reso.getResolution(JetEta=test_eta, Rho=test_Rho, JetPt=test_pt) + resos_jag = reso.getResolution( + JetEta=test_eta_jag, Rho=test_Rho_jag, JetPt=test_pt_jag + ) + resos_dak = reso.getResolution( + JetEta=test_eta_dak, Rho=test_Rho_dak, JetPt=test_pt_dak + ) + assert ak.all(np.abs(resos - ak.flatten(resos_jag)) < 1e-6) + assert ak.all(np.abs(resos - ak.flatten(resos_dak.compute())) < 1e-6) + print(resos_dak) + print(resos_dak.dask) + + test_pt_jag = test_pt_jag[0:3] + test_eta_jag = test_eta_jag[0:3] + test_Rho_jag = test_Rho_jag[0:3] + test_Rho_jag = ak.concatenate( + [test_Rho_jag[:-1], [ak.concatenate([test_Rho_jag[-1, :-1], 100.0])]] + ) + counts = counts[0:3] + print("Raw jet values:") + print("pT:", test_pt_jag) + print("eta:", test_eta_jag) + print("rho:", test_Rho_jag, "\n") + + resos_jag_ref = ak.unflatten( + np.array( + [ + 0.21974642, + 0.32421591, + 0.33702479, + 0.27420327, + 0.13940689, + 0.48134521, + 0.26564994, + 1.0, + ] + ), + counts, + ) + resos_jag = reso.getResolution( + JetEta=test_eta_jag, Rho=test_Rho_jag, JetPt=test_pt_jag + ) + print("Reference Resolution (jagged):", resos_jag_ref) + print("Resolution (jagged):", resos_jag) + # NB: 5e-4 tolerance was agreed upon by lgray and aperloff, if the differences get bigger over time + # we need to agree upon how these numbers are evaluated (double/float conversion is kinda random) + assert ak.all(np.abs(ak.flatten(resos_jag_ref) - ak.flatten(resos_jag)) < 5e-4) -def test_jet_correction_uncertainty_sources(): +@pytest.mark.parametrize("optimization_enabled", [True, False]) +def test_jet_correction_uncertainty(optimization_enabled): from coffea.jetmet_tools import JetCorrectionUncertainty - counts, test_eta, test_pt = dummy_jagged_eta_pt() - - test_pt_jag = ak.unflatten(test_pt, counts) - test_eta_jag = ak.unflatten(test_eta, counts) - - test_pt_dak = dak.from_awkward(test_pt_jag, 1) - test_eta_dak = dak.from_awkward(test_eta_jag, 1) - - junc_names = [] - levels = [] - for name in dir(evaluator): - if "Summer16_23Sep2016V3_MC_UncertaintySources_AK4PFPuppi" in name: - junc_names.append(name) - levels.append(name.split("_")[-1]) - # test for underscore in dataera - if "Fall17_17Nov2017_V6_MC_UncertaintySources_AK4PFchs_AbsoluteFlavMap" in name: - junc_names.append(name) - levels.append(name.split("_")[-1]) - junc = JetCorrectionUncertainty(**{name: evaluator[name] for name in junc_names}) + with dask.config.set({"awkward.optimization.enabled": optimization_enabled}): + counts, test_eta, test_pt = dummy_jagged_eta_pt() - print(junc) + test_pt_jag = ak.unflatten(test_pt, counts) + test_eta_jag = ak.unflatten(test_eta, counts) - juncs = junc.getUncertainty(JetEta=test_eta, JetPt=test_pt) + test_pt_dak = dak.from_awkward(test_pt_jag, 1) + test_eta_dak = dak.from_awkward(test_eta_jag, 1) - juncs_jag = list(junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag)) - - juncs_dak = list(junc.getUncertainty(JetEta=test_eta_dak, JetPt=test_pt_dak)) + junc_names = ["Summer16_23Sep2016V3_MC_Uncertainty_AK4PFPuppi"] + junc = JetCorrectionUncertainty( + **{name: evaluator[name] for name in junc_names} + ) - for i, (level, corrs) in enumerate(juncs): - assert level in levels - assert corrs.shape[0] == test_eta.shape[0] - assert ak.all(corrs == ak.flatten(juncs_jag[i][1])) - assert ak.all(corrs == ak.flatten(juncs_dak[i][1].compute())) + print(junc) + + juncs = junc.getUncertainty(JetEta=test_eta, JetPt=test_pt) + + juncs_jag = list(junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag)) + + juncs_dak = list(junc.getUncertainty(JetEta=test_eta_dak, JetPt=test_pt_dak)) + + for i, (level, corrs) in enumerate(juncs): + assert corrs.shape[0] == test_eta.shape[0] + assert ak.all(corrs == ak.flatten(juncs_jag[i][1])) + assert ak.all(corrs == ak.flatten(juncs_dak[i][1].compute())) + + zipped_dak = dak.zip({k: v for k, v in juncs_dak}) + print(zipped_dak) + print(zipped_dak.dask) + + test_pt_jag = test_pt_jag[0:3] + test_eta_jag = test_eta_jag[0:3] + counts = counts[0:3] + print("Raw jet values:") + print("pT:", test_pt_jag.tolist()) + print("eta:", test_eta_jag.tolist(), "\n") + + juncs_jag_ref = ak.unflatten( + np.array( + [ + [1.053504214, 0.946495786], + [1.033343349, 0.966656651], + [1.065159157, 0.934840843], + [1.033140127, 0.966859873], + [1.016858652, 0.983141348], + [1.130199999, 0.869800001], + [1.039968468, 0.960031532], + [1.033100002, 0.966899998], + ] + ), + counts, + ) + juncs_jag = list(junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag)) - zipped_dak = dak.zip({k: v for k, v in juncs_dak}) - print(zipped_dak) - print(zipped_dak.dask) + for i, (level, corrs) in enumerate(juncs_jag): + print("Index:", i) + print("Correction level:", level) + print("Reference Uncertainties (jagged):", juncs_jag_ref) + print("Uncertainties (jagged):", corrs) + assert ak.all(np.abs(ak.flatten(juncs_jag_ref) - ak.flatten(corrs)) < 1e-6) - test_pt_jag = test_pt_jag[0:3] - test_eta_jag = test_eta_jag[0:3] - counts = counts[0:3] - print("Raw jet values:") - print("pT:", test_pt_jag.tolist()) - print("eta:", test_eta_jag.tolist(), "\n") - juncs_jag_ref = ak.unflatten( - np.array( - [ - [1.053504214, 0.946495786], - [1.033343349, 0.966656651], - [1.065159157, 0.934840843], - [1.033140127, 0.966859873], - [1.016858652, 0.983141348], - [1.130199999, 0.869800001], - [1.039968468, 0.960031532], - [1.033100002, 0.966899998], - ] - ), - counts, - ) - juncs_jag = list(junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag)) - for i, (level, corrs) in enumerate(juncs_jag): - if level != "Total": - continue - print("Index:", i) - print("Correction level:", level) - print("Reference Uncertainties (jagged):", juncs_jag_ref) - print("Uncertainties (jagged):", corrs, "\n") - assert ak.all(np.abs(ak.flatten(juncs_jag_ref) - ak.flatten(corrs)) < 1e-6) - - -def test_jet_correction_regrouped_uncertainty_sources(): +@pytest.mark.parametrize("optimization_enabled", [True, False]) +def test_jet_correction_uncertainty_sources(optimization_enabled): from coffea.jetmet_tools import JetCorrectionUncertainty - counts, test_eta, test_pt = dummy_jagged_eta_pt() + with dask.config.set({"awkward.optimization.enabled": optimization_enabled}): + counts, test_eta, test_pt = dummy_jagged_eta_pt() - test_pt_jag = ak.unflatten(test_pt, counts) - test_eta_jag = ak.unflatten(test_eta, counts) + test_pt_jag = ak.unflatten(test_pt, counts) + test_eta_jag = ak.unflatten(test_eta, counts) - test_pt_dak = dak.from_awkward(test_pt_jag, 1) - test_eta_dak = dak.from_awkward(test_eta_jag, 1) + test_pt_dak = dak.from_awkward(test_pt_jag, 1) + test_eta_dak = dak.from_awkward(test_eta_jag, 1) - junc_names = [] - levels = [] - for name in dir(evaluator): - if "Regrouped_Fall17_17Nov2017_V32_MC_UncertaintySources_AK4PFchs" in name: - junc_names.append(name) - if len(name.split("_")) == 9: - levels.append("_".join(name.split("_")[-2:])) - else: + junc_names = [] + levels = [] + for name in dir(evaluator): + if "Summer16_23Sep2016V3_MC_UncertaintySources_AK4PFPuppi" in name: + junc_names.append(name) levels.append(name.split("_")[-1]) - junc = JetCorrectionUncertainty(**{name: evaluator[name] for name in junc_names}) - - print(junc) - - juncs_jag = list(junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag)) - - juncs_dak = list(junc.getUncertainty(JetEta=test_eta_dak, JetPt=test_pt_dak)) - - for i, tpl in enumerate(list(junc.getUncertainty(JetEta=test_eta, JetPt=test_pt))): - assert tpl[0] in levels - assert tpl[1].shape[0] == test_eta.shape[0] - assert ak.all(tpl[1] == ak.flatten(juncs_jag[i][1])) - assert ak.all(tpl[1] == ak.flatten(juncs_dak[i][1].compute())) + # test for underscore in dataera + if ( + "Fall17_17Nov2017_V6_MC_UncertaintySources_AK4PFchs_AbsoluteFlavMap" + in name + ): + junc_names.append(name) + levels.append(name.split("_")[-1]) + junc = JetCorrectionUncertainty( + **{name: evaluator[name] for name in junc_names} + ) - zipped_dak = dak.zip({k: v for k, v in juncs_dak}) - print(zipped_dak) - print(zipped_dak.dask) + print(junc) + + juncs = junc.getUncertainty(JetEta=test_eta, JetPt=test_pt) + + juncs_jag = list(junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag)) + + juncs_dak = list(junc.getUncertainty(JetEta=test_eta_dak, JetPt=test_pt_dak)) + + for i, (level, corrs) in enumerate(juncs): + assert level in levels + assert corrs.shape[0] == test_eta.shape[0] + assert ak.all(corrs == ak.flatten(juncs_jag[i][1])) + assert ak.all(corrs == ak.flatten(juncs_dak[i][1].compute())) + + zipped_dak = dak.zip({k: v for k, v in juncs_dak}) + print(zipped_dak) + print(zipped_dak.dask) + + test_pt_jag = test_pt_jag[0:3] + test_eta_jag = test_eta_jag[0:3] + counts = counts[0:3] + print("Raw jet values:") + print("pT:", test_pt_jag.tolist()) + print("eta:", test_eta_jag.tolist(), "\n") + + juncs_jag_ref = ak.unflatten( + np.array( + [ + [1.053504214, 0.946495786], + [1.033343349, 0.966656651], + [1.065159157, 0.934840843], + [1.033140127, 0.966859873], + [1.016858652, 0.983141348], + [1.130199999, 0.869800001], + [1.039968468, 0.960031532], + [1.033100002, 0.966899998], + ] + ), + counts, + ) + juncs_jag = list(junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag)) + for i, (level, corrs) in enumerate(juncs_jag): + if level != "Total": + continue + print("Index:", i) + print("Correction level:", level) + print("Reference Uncertainties (jagged):", juncs_jag_ref) + print("Uncertainties (jagged):", corrs, "\n") + assert ak.all(np.abs(ak.flatten(juncs_jag_ref) - ak.flatten(corrs)) < 1e-6) + + +@pytest.mark.parametrize("optimization_enabled", [True, False]) +def test_jet_correction_regrouped_uncertainty_sources(optimization_enabled): + from coffea.jetmet_tools import JetCorrectionUncertainty - test_pt_jag = test_pt_jag[0:3] - test_eta_jag = test_eta_jag[0:3] - counts = counts[0:3] - print("Raw jet values:") - print("pT:", test_pt_jag.tolist()) - print("eta:", test_eta_jag.tolist(), "\n") + with dask.config.set({"awkward.optimization.enabled": optimization_enabled}): + counts, test_eta, test_pt = dummy_jagged_eta_pt() + + test_pt_jag = ak.unflatten(test_pt, counts) + test_eta_jag = ak.unflatten(test_eta, counts) + + test_pt_dak = dak.from_awkward(test_pt_jag, 1) + test_eta_dak = dak.from_awkward(test_eta_jag, 1) + + junc_names = [] + levels = [] + for name in dir(evaluator): + if "Regrouped_Fall17_17Nov2017_V32_MC_UncertaintySources_AK4PFchs" in name: + junc_names.append(name) + if len(name.split("_")) == 9: + levels.append("_".join(name.split("_")[-2:])) + else: + levels.append(name.split("_")[-1]) + junc = JetCorrectionUncertainty( + **{name: evaluator[name] for name in junc_names} + ) - juncs_jag_ref = ak.unflatten( - np.array( - [ - [1.119159088, 0.880840912], - [1.027003404, 0.972996596], - [1.135201275, 0.864798725], - [1.039665259, 0.960334741], - [1.015064503, 0.984935497], - [1.149900004, 0.850099996], - [1.079960600, 0.920039400], - [1.041200001, 0.958799999], - ] - ), - counts, - ) - juncs_jag = list(junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag)) - for i, (level, corrs) in enumerate(juncs_jag): - if level != "Total": - continue - print("Index:", i) - print("Correction level:", level) - print("Reference Uncertainties (jagged):", juncs_jag_ref) - print("Uncertainties (jagged):", corrs, "\n") - assert ak.all(np.abs(ak.flatten(juncs_jag_ref) - ak.flatten(corrs)) < 1e-6) - - -def test_jet_resolution_sf(): + print(junc) + + juncs_jag = list(junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag)) + + juncs_dak = list(junc.getUncertainty(JetEta=test_eta_dak, JetPt=test_pt_dak)) + + for i, tpl in enumerate( + list(junc.getUncertainty(JetEta=test_eta, JetPt=test_pt)) + ): + assert tpl[0] in levels + assert tpl[1].shape[0] == test_eta.shape[0] + assert ak.all(tpl[1] == ak.flatten(juncs_jag[i][1])) + assert ak.all(tpl[1] == ak.flatten(juncs_dak[i][1].compute())) + + zipped_dak = dak.zip({k: v for k, v in juncs_dak}) + print(zipped_dak) + print(zipped_dak.dask) + + test_pt_jag = test_pt_jag[0:3] + test_eta_jag = test_eta_jag[0:3] + counts = counts[0:3] + print("Raw jet values:") + print("pT:", test_pt_jag.tolist()) + print("eta:", test_eta_jag.tolist(), "\n") + + juncs_jag_ref = ak.unflatten( + np.array( + [ + [1.119159088, 0.880840912], + [1.027003404, 0.972996596], + [1.135201275, 0.864798725], + [1.039665259, 0.960334741], + [1.015064503, 0.984935497], + [1.149900004, 0.850099996], + [1.079960600, 0.920039400], + [1.041200001, 0.958799999], + ] + ), + counts, + ) + juncs_jag = list(junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag)) + for i, (level, corrs) in enumerate(juncs_jag): + if level != "Total": + continue + print("Index:", i) + print("Correction level:", level) + print("Reference Uncertainties (jagged):", juncs_jag_ref) + print("Uncertainties (jagged):", corrs, "\n") + assert ak.all(np.abs(ak.flatten(juncs_jag_ref) - ak.flatten(corrs)) < 1e-6) + + +@pytest.mark.parametrize("optimization_enabled", [True, False]) +def test_jet_resolution_sf(optimization_enabled): from coffea.jetmet_tools import JetResolutionScaleFactor - counts, test_eta, test_pt = dummy_jagged_eta_pt() - - test_pt_jag = ak.unflatten(test_pt, counts) - test_eta_jag = ak.unflatten(test_eta, counts) + with dask.config.set({"awkward.optimization.enabled": optimization_enabled}): + counts, test_eta, test_pt = dummy_jagged_eta_pt() - test_eta_dak = dak.from_awkward(test_eta_jag, 1) + test_pt_jag = ak.unflatten(test_pt, counts) + test_eta_jag = ak.unflatten(test_eta, counts) - jersf_names = ["Spring16_25nsV10_MC_SF_AK4PFPuppi"] - resosf = JetResolutionScaleFactor(**{name: evaluator[name] for name in jersf_names}) + test_eta_dak = dak.from_awkward(test_eta_jag, 1) - print(resosf) - - # 0-jet compatibility - assert resosf.getScaleFactor(JetEta=test_eta[:0]).shape == (0, 3) - - resosfs = resosf.getScaleFactor(JetEta=test_eta) - resosfs_jag = resosf.getScaleFactor(JetEta=test_eta_jag) - resosfs_dak = resosf.getScaleFactor(JetEta=test_eta_dak) - assert ak.all(resosfs == ak.flatten(resosfs_jag)) - assert ak.all(resosfs == ak.flatten(resosfs_dak.compute())) - print(resosfs_dak) - print(resosfs_dak.dask) - - test_pt_jag = test_pt_jag[0:3] - test_eta_jag = test_eta_jag[0:3] - counts = counts[0:3] - print("Raw jet values:") - print("pT:", test_pt_jag) - print("eta:", test_eta_jag, "\n") + jersf_names = ["Spring16_25nsV10_MC_SF_AK4PFPuppi"] + resosf = JetResolutionScaleFactor( + **{name: evaluator[name] for name in jersf_names} + ) - resosfs_jag_ref = ak.unflatten( - np.array( - [ - [1.857, 1.928, 1.786], - [1.084, 1.095, 1.073], - [1.364, 1.403, 1.325], - [1.177, 1.218, 1.136], - [1.138, 1.151, 1.125], - [1.364, 1.403, 1.325], - [1.177, 1.218, 1.136], - [1.082, 1.117, 1.047], - ] - ), - counts, - ) - resosfs_jag = resosf.getScaleFactor(JetEta=test_eta_jag) - print("Reference Resolution SF (jagged):", resosfs_jag_ref) - print("Resolution SF (jagged):", resosfs_jag) - assert ak.all(np.abs(ak.flatten(resosfs_jag_ref) - ak.flatten(resosfs_jag)) < 1e-6) + print(resosf) + + # 0-jet compatibility + assert resosf.getScaleFactor(JetEta=test_eta[:0]).shape == (0, 3) + + resosfs = resosf.getScaleFactor(JetEta=test_eta) + resosfs_jag = resosf.getScaleFactor(JetEta=test_eta_jag) + resosfs_dak = resosf.getScaleFactor(JetEta=test_eta_dak) + assert ak.all(resosfs == ak.flatten(resosfs_jag)) + assert ak.all(resosfs == ak.flatten(resosfs_dak.compute())) + print(resosfs_dak) + print(resosfs_dak.dask) + + test_pt_jag = test_pt_jag[0:3] + test_eta_jag = test_eta_jag[0:3] + counts = counts[0:3] + print("Raw jet values:") + print("pT:", test_pt_jag) + print("eta:", test_eta_jag, "\n") + + resosfs_jag_ref = ak.unflatten( + np.array( + [ + [1.857, 1.928, 1.786], + [1.084, 1.095, 1.073], + [1.364, 1.403, 1.325], + [1.177, 1.218, 1.136], + [1.138, 1.151, 1.125], + [1.364, 1.403, 1.325], + [1.177, 1.218, 1.136], + [1.082, 1.117, 1.047], + ] + ), + counts, + ) + resosfs_jag = resosf.getScaleFactor(JetEta=test_eta_jag) + print("Reference Resolution SF (jagged):", resosfs_jag_ref) + print("Resolution SF (jagged):", resosfs_jag) + assert ak.all( + np.abs(ak.flatten(resosfs_jag_ref) - ak.flatten(resosfs_jag)) < 1e-6 + ) -def test_jet_resolution_sf_2d(): +@pytest.mark.parametrize("optimization_enabled", [True, False]) +def test_jet_resolution_sf_2d(optimization_enabled): from coffea.jetmet_tools import JetResolutionScaleFactor - counts, test_eta, test_pt = dummy_jagged_eta_pt() - - test_pt_jag = ak.unflatten(test_pt, counts) - test_eta_jag = ak.unflatten(test_eta, counts) + with dask.config.set({"awkward.optimization.enabled": optimization_enabled}): + counts, test_eta, test_pt = dummy_jagged_eta_pt() - test_pt_dak = dak.from_awkward(test_pt_jag, 1) - test_eta_dak = dak.from_awkward(test_eta_jag, 1) + test_pt_jag = ak.unflatten(test_pt, counts) + test_eta_jag = ak.unflatten(test_eta, counts) - resosf = JetResolutionScaleFactor( - **{name: evaluator[name] for name in ["Autumn18_V7_MC_SF_AK4PFchs"]} - ) - - print(resosf) + test_pt_dak = dak.from_awkward(test_pt_jag, 1) + test_eta_dak = dak.from_awkward(test_eta_jag, 1) - # 0-jet compatibility - assert resosf.getScaleFactor(JetPt=test_pt[:0], JetEta=test_eta[:0]).shape == (0, 3) + resosf = JetResolutionScaleFactor( + **{name: evaluator[name] for name in ["Autumn18_V7_MC_SF_AK4PFchs"]} + ) - resosfs = resosf.getScaleFactor(JetPt=test_pt, JetEta=test_eta) - resosfs_jag = resosf.getScaleFactor(JetPt=test_pt_jag, JetEta=test_eta_jag) - resosfs_dak = resosf.getScaleFactor(JetPt=test_pt_dak, JetEta=test_eta_dak) - assert ak.all(resosfs == ak.flatten(resosfs_jag)) - assert ak.all(resosfs == ak.flatten(resosfs_dak.compute())) - print(resosfs_dak) - print(resosfs_dak.dask) + print(resosf) - test_pt_jag = test_pt_jag[0:3] - test_eta_jag = test_eta_jag[0:3] - counts = counts[0:3] - print("Raw jet values:") - print("pT:", test_pt_jag) - print("eta:", test_eta_jag, "\n") + # 0-jet compatibility + assert resosf.getScaleFactor(JetPt=test_pt[:0], JetEta=test_eta[:0]).shape == ( + 0, + 3, + ) - resosfs_jag_ref = ak.unflatten( - np.array( - [ - [1.11904, 1.31904, 1.0], - [1.1432, 1.2093, 1.0771], - [1.16633, 1.36633, 1.0], - [1.17642, 1.37642, 1.0], - [1.1808, 1.1977, 1.1640], - [1.15965, 1.35965, 1.0], - [1.17661, 1.37661, 1.0], - [1.1175, 1.1571, 1.0778], - ] - ), - counts, - ) - resosfs_jag = resosf.getScaleFactor(JetPt=test_pt_jag, JetEta=test_eta_jag) - print("Reference Resolution SF (jagged):", resosfs_jag_ref) - print("Resolution SF (jagged):", resosfs_jag) - assert ak.all(np.abs(ak.flatten(resosfs_jag_ref) - ak.flatten(resosfs_jag)) < 1e-6) + resosfs = resosf.getScaleFactor(JetPt=test_pt, JetEta=test_eta) + resosfs_jag = resosf.getScaleFactor(JetPt=test_pt_jag, JetEta=test_eta_jag) + resosfs_dak = resosf.getScaleFactor(JetPt=test_pt_dak, JetEta=test_eta_dak) + assert ak.all(resosfs == ak.flatten(resosfs_jag)) + assert ak.all(resosfs == ak.flatten(resosfs_dak.compute())) + print(resosfs_dak) + print(resosfs_dak.dask) + + test_pt_jag = test_pt_jag[0:3] + test_eta_jag = test_eta_jag[0:3] + counts = counts[0:3] + print("Raw jet values:") + print("pT:", test_pt_jag) + print("eta:", test_eta_jag, "\n") + + resosfs_jag_ref = ak.unflatten( + np.array( + [ + [1.11904, 1.31904, 1.0], + [1.1432, 1.2093, 1.0771], + [1.16633, 1.36633, 1.0], + [1.17642, 1.37642, 1.0], + [1.1808, 1.1977, 1.1640], + [1.15965, 1.35965, 1.0], + [1.17661, 1.37661, 1.0], + [1.1175, 1.1571, 1.0778], + ] + ), + counts, + ) + resosfs_jag = resosf.getScaleFactor(JetPt=test_pt_jag, JetEta=test_eta_jag) + print("Reference Resolution SF (jagged):", resosfs_jag_ref) + print("Resolution SF (jagged):", resosfs_jag) + assert ak.all( + np.abs(ak.flatten(resosfs_jag_ref) - ak.flatten(resosfs_jag)) < 1e-6 + ) -def test_corrected_jets_factory(): +@pytest.mark.parametrize("optimization_enabled", [True, False]) +def test_corrected_jets_factory(optimization_enabled): import os - import dask - from coffea.jetmet_tools import CorrectedJetsFactory, CorrectedMETFactory, JECStack events = None from coffea.nanoevents import NanoEventsFactory - events = NanoEventsFactory.from_root( - {os.path.abspath("tests/samples/nano_dy.root"): "Events"}, - metadata={}, - permit_dask=True, - ).events() - - jec_stack_names = [ - "Summer16_23Sep2016V3_MC_L1FastJet_AK4PFPuppi", - "Summer16_23Sep2016V3_MC_L2Relative_AK4PFPuppi", - "Summer16_23Sep2016V3_MC_L2L3Residual_AK4PFPuppi", - "Summer16_23Sep2016V3_MC_L3Absolute_AK4PFPuppi", - "Spring16_25nsV10_MC_PtResolution_AK4PFPuppi", - "Spring16_25nsV10_MC_SF_AK4PFPuppi", - ] - for key in evaluator.keys(): - if "Summer16_23Sep2016V3_MC_UncertaintySources_AK4PFPuppi" in key: - jec_stack_names.append(key) - - jec_inputs = {name: evaluator[name] for name in jec_stack_names} - jec_stack = JECStack(jec_inputs) - - name_map = jec_stack.blank_name_map - name_map["JetPt"] = "pt" - name_map["JetMass"] = "mass" - name_map["JetEta"] = "eta" - name_map["JetA"] = "area" - - jets = events.Jet - - jets["pt_raw"] = (1 - jets["rawFactor"]) * jets.pt - jets["mass_raw"] = (1 - jets["rawFactor"]) * jets.mass - jets["pt_gen"] = dak.fill_none(jets.matched_gen.pt, 0) - jets["rho"] = events.fixedGridRhoFastjetAll - name_map["ptGenJet"] = "pt_gen" - name_map["ptRaw"] = "pt_raw" - name_map["massRaw"] = "mass_raw" - name_map["Rho"] = "rho" - - print(name_map) - - tic = time.time() - jet_factory = CorrectedJetsFactory(name_map, jec_stack) - toc = time.time() - - print("setup corrected jets time =", toc - tic) - - tic = time.time() - prof = pyinstrument.Profiler() - prof.start() - corrected_jets = jet_factory.build(jets) - prof.stop() - toc = time.time() - - print("corrected_jets build time =", toc - tic) - - print(prof.output_text(unicode=True, color=True, show_all=True)) - - print(corrected_jets.dask) - - print("Generated jet pt:", corrected_jets.pt_gen.compute()) - print("Original jet pt:", corrected_jets.pt_orig.compute()) - print("Raw jet pt:", jets.pt_raw) - print("Corrected jet pt:", corrected_jets.pt.compute()) - print("Original jet mass:", corrected_jets.mass_orig.compute()) - print("Raw jet mass:", jets["mass_raw"]) - print("Corrected jet mass:", corrected_jets.mass.compute()) - print("jet eta:", jets.eta) - - tic = time.time() - prof = pyinstrument.Profiler() - prof.start() - - tocompute = { - unc: {"up": corrected_jets[unc].up.pt, "down": corrected_jets[unc].down.pt} - for unc in jet_factory.uncertainties() - } - computed_uncs = dask.compute(tocompute)[0] - - for unc in jet_factory.uncertainties(): - print(unc) - print(computed_uncs[unc]["up"]) - print(computed_uncs[unc]["down"]) - prof.stop() - toc = time.time() - - print(prof.output_text(unicode=True, color=True, show_all=True)) - - print("build all jet variations =", toc - tic) - - # Test that the corrections were applied correctly - from coffea.jetmet_tools import ( - FactorizedJetCorrector, - JetResolution, - JetResolutionScaleFactor, - ) + with dask.config.set({"awkward.optimization.enabled": True}): + events = NanoEventsFactory.from_root( + {os.path.abspath("tests/samples/nano_dy.root"): "Events"}, + metadata={}, + permit_dask=True, + ).events() + + jec_stack_names = [ + "Summer16_23Sep2016V3_MC_L1FastJet_AK4PFPuppi", + "Summer16_23Sep2016V3_MC_L2Relative_AK4PFPuppi", + "Summer16_23Sep2016V3_MC_L2L3Residual_AK4PFPuppi", + "Summer16_23Sep2016V3_MC_L3Absolute_AK4PFPuppi", + "Spring16_25nsV10_MC_PtResolution_AK4PFPuppi", + "Spring16_25nsV10_MC_SF_AK4PFPuppi", + ] + for key in evaluator.keys(): + if "Summer16_23Sep2016V3_MC_UncertaintySources_AK4PFPuppi" in key: + jec_stack_names.append(key) + + jec_inputs = {name: evaluator[name] for name in jec_stack_names} + jec_stack = JECStack(jec_inputs) + + name_map = jec_stack.blank_name_map + name_map["JetPt"] = "pt" + name_map["JetMass"] = "mass" + name_map["JetEta"] = "eta" + name_map["JetA"] = "area" + + jets = events.Jet + + jets["pt_raw"] = (1 - jets["rawFactor"]) * jets.pt + jets["mass_raw"] = (1 - jets["rawFactor"]) * jets.mass + jets["pt_gen"] = dak.fill_none(jets.matched_gen.pt, 0) + jets["rho"] = events.fixedGridRhoFastjetAll + name_map["ptGenJet"] = "pt_gen" + name_map["ptRaw"] = "pt_raw" + name_map["massRaw"] = "mass_raw" + name_map["Rho"] = "rho" + + print(name_map) + + tic = time.time() + jet_factory = CorrectedJetsFactory(name_map, jec_stack) + toc = time.time() + + print("setup corrected jets time =", toc - tic) + + tic = time.time() + prof = pyinstrument.Profiler() + prof.start() + corrected_jets = jet_factory.build(jets) + prof.stop() + toc = time.time() + + print("corrected_jets build time =", toc - tic) + + print(prof.output_text(unicode=True, color=True, show_all=True)) + + print(corrected_jets.dask) + + print("Generated jet pt:", corrected_jets.pt_gen.compute()) + print("Original jet pt:", corrected_jets.pt_orig.compute()) + print("Raw jet pt:", jets.pt_raw) + print("Corrected jet pt:", corrected_jets.pt.compute()) + print("Original jet mass:", corrected_jets.mass_orig.compute()) + print("Raw jet mass:", jets["mass_raw"]) + print("Corrected jet mass:", corrected_jets.mass.compute()) + print("jet eta:", jets.eta) + + tic = time.time() + prof = pyinstrument.Profiler() + prof.start() + + tocompute = { + unc: {"up": corrected_jets[unc].up.pt, "down": corrected_jets[unc].down.pt} + for unc in jet_factory.uncertainties() + } + computed_uncs = dask.compute(tocompute)[0] + + for unc in jet_factory.uncertainties(): + print(unc) + print(computed_uncs[unc]["up"]) + print(computed_uncs[unc]["down"]) + prof.stop() + toc = time.time() + + print(prof.output_text(unicode=True, color=True, show_all=True)) + + print("build all jet variations =", toc - tic) + + # Test that the corrections were applied correctly + from coffea.jetmet_tools import ( + FactorizedJetCorrector, + JetResolution, + JetResolutionScaleFactor, + ) - corrector = FactorizedJetCorrector( - **{name: evaluator[name] for name in jec_stack_names[0:4]} - ) + corrector = FactorizedJetCorrector( + **{name: evaluator[name] for name in jec_stack_names[0:4]} + ) - check_corrs = corrector.getCorrection( - JetEta=jets.eta, - Rho=jets.rho, - JetPt=jets.pt_raw, - JetA=jets.area, - ).compute() - reso = JetResolution(**{name: evaluator[name] for name in jec_stack_names[4:5]}) - check_resos = reso.getResolution( - JetEta=jets.eta, - Rho=jets.rho, - JetPt=jets.pt_raw, - ).compute() - resosf = JetResolutionScaleFactor( - **{name: evaluator[name] for name in jec_stack_names[5:6]} - ) + check_corrs = corrector.getCorrection( + JetEta=jets.eta, + Rho=jets.rho, + JetPt=jets.pt_raw, + JetA=jets.area, + ).compute() + reso = JetResolution(**{name: evaluator[name] for name in jec_stack_names[4:5]}) + check_resos = reso.getResolution( + JetEta=jets.eta, + Rho=jets.rho, + JetPt=jets.pt_raw, + ).compute() + resosf = JetResolutionScaleFactor( + **{name: evaluator[name] for name in jec_stack_names[5:6]} + ) - print(dak.necessary_columns(jets.eta)) - print( - dak.necessary_columns( - resosf.getScaleFactor( - JetEta=jets.eta, + print(dak.necessary_columns(jets.eta)) + print( + dak.necessary_columns( + resosf.getScaleFactor( + JetEta=jets.eta, + ) ) ) - ) - check_resosfs = resosf.getScaleFactor( - JetEta=events.Jet.eta, - ).compute() + check_resosfs = resosf.getScaleFactor( + JetEta=events.Jet.eta, + ).compute() - # Filter out the non-deterministic (no gen pt) jets - def smear_factor(jetPt, pt_gen, jersf): - return ( - ak.full_like(jetPt, 1.0) - + (jersf[:, 0] - ak.full_like(jetPt, 1.0)) * (jetPt - pt_gen) / jetPt - ) + # Filter out the non-deterministic (no gen pt) jets + def smear_factor(jetPt, pt_gen, jersf): + return ( + ak.full_like(jetPt, 1.0) + + (jersf[:, 0] - ak.full_like(jetPt, 1.0)) * (jetPt - pt_gen) / jetPt + ) - test_gen_pt = ak.concatenate( - [ - dak.fill_none(events.Jet.matched_gen.pt, 0).compute()[0, :-2], - dak.fill_none(events.Jet.matched_gen.pt, 0).compute()[-1, :-1], - ] - ) - test_raw_pt = ak.concatenate( - [ - ((1 - events.Jet.rawFactor) * events.Jet.pt).compute()[0, :-2], - ((1 - events.Jet.rawFactor) * events.Jet.pt).compute()[-1, :-1], - ] - ) - test_pt = ak.concatenate( - [corrected_jets.pt.compute()[0, :-2], corrected_jets.pt.compute()[-1, :-1]] - ) - test_eta = ak.concatenate( - [events.Jet.eta.compute()[0, :-2], events.Jet.eta.compute()[-1, :-1]] - ) - test_jer = ak.concatenate([check_resos[0, :-2], check_resos[-1, :-1]]) - test_jer_sf = ak.concatenate( - [ - check_resosfs[0, :-2], - check_resosfs[-1, :-1], - ] - ) - test_jec = ak.concatenate([check_corrs[0, :-2], check_corrs[-1, :-1]]) - test_corrected_pt = ak.concatenate( - [corrected_jets.pt.compute()[0, :-2], corrected_jets.pt.compute()[-1, :-1]] - ) - test_corr_pt = test_raw_pt * test_jec - test_pt_smear_corr = test_corr_pt * smear_factor( - test_corr_pt, test_gen_pt, test_jer_sf - ) + test_gen_pt = ak.concatenate( + [ + dak.fill_none(events.Jet.matched_gen.pt, 0).compute()[0, :-2], + dak.fill_none(events.Jet.matched_gen.pt, 0).compute()[-1, :-1], + ] + ) + test_raw_pt = ak.concatenate( + [ + ((1 - events.Jet.rawFactor) * events.Jet.pt).compute()[0, :-2], + ((1 - events.Jet.rawFactor) * events.Jet.pt).compute()[-1, :-1], + ] + ) + test_pt = ak.concatenate( + [corrected_jets.pt.compute()[0, :-2], corrected_jets.pt.compute()[-1, :-1]] + ) + test_eta = ak.concatenate( + [events.Jet.eta.compute()[0, :-2], events.Jet.eta.compute()[-1, :-1]] + ) + test_jer = ak.concatenate([check_resos[0, :-2], check_resos[-1, :-1]]) + test_jer_sf = ak.concatenate( + [ + check_resosfs[0, :-2], + check_resosfs[-1, :-1], + ] + ) + test_jec = ak.concatenate([check_corrs[0, :-2], check_corrs[-1, :-1]]) + test_corrected_pt = ak.concatenate( + [corrected_jets.pt.compute()[0, :-2], corrected_jets.pt.compute()[-1, :-1]] + ) + test_corr_pt = test_raw_pt * test_jec + test_pt_smear_corr = test_corr_pt * smear_factor( + test_corr_pt, test_gen_pt, test_jer_sf + ) - # Print the results of the "by-hand" calculations and confirm that the values match the expected values - print("\nConfirm the CorrectedJetsFactory values:") - print("Jet pt (gen)", test_gen_pt.tolist()) - print("Jet pt (raw)", test_raw_pt.tolist()) - print("Jet pt (nano):", test_pt.tolist()) - print("Jet eta:", test_eta.tolist()) - print("Jet energy resolution:", test_jer.tolist()) - print("Jet energy resolution sf:", test_jer_sf.tolist()) - print("Jet energy correction:", test_jec.tolist()) - print("Corrected jet pt (ref)", test_corr_pt.tolist()) - print("Corrected & smeared jet pt (ref):", test_pt_smear_corr.tolist()) - print("Corrected & smeared jet pt:", test_corrected_pt.tolist(), "\n") - assert ak.all(np.abs(test_pt_smear_corr - test_corrected_pt) < 1e-6) - - name_map["METpt"] = "pt" - name_map["METphi"] = "phi" - name_map["JetPhi"] = "phi" - name_map["UnClusteredEnergyDeltaX"] = "MetUnclustEnUpDeltaX" - name_map["UnClusteredEnergyDeltaY"] = "MetUnclustEnUpDeltaY" - - tic = time.time() - met_factory = CorrectedMETFactory(name_map) - toc = time.time() - - print("setup corrected MET time =", toc - tic) - - met = events.MET - tic = time.time() - # prof = pyinstrument.Profiler() - # prof.start() - corrected_met = met_factory.build(met, corrected_jets) - # prof.stop() - toc = time.time() - - # print(prof.output_text(unicode=True, color=True, show_all=True)) - - print("corrected_met build time =", toc - tic) - - print(corrected_met.dask) - - print(corrected_met.pt_orig.compute()) - print(corrected_met.pt.compute()) - tic = time.time() - prof = pyinstrument.Profiler() - prof.start() - - tocompute = { - unc: {"up": corrected_met[unc].up.pt, "down": corrected_met[unc].down.pt} - for unc in (jet_factory.uncertainties() + met_factory.uncertainties()) - } - computed_uncs = dask.compute(tocompute)[0] - - for unc in jet_factory.uncertainties() + met_factory.uncertainties(): - print(unc) - print(computed_uncs[unc]["up"]) - print(computed_uncs[unc]["down"]) - prof.stop() - toc = time.time() - - print("build all met variations =", toc - tic) - - print(prof.output_text(unicode=True, color=True, show_all=True)) + # Print the results of the "by-hand" calculations and confirm that the values match the expected values + print("\nConfirm the CorrectedJetsFactory values:") + print("Jet pt (gen)", test_gen_pt.tolist()) + print("Jet pt (raw)", test_raw_pt.tolist()) + print("Jet pt (nano):", test_pt.tolist()) + print("Jet eta:", test_eta.tolist()) + print("Jet energy resolution:", test_jer.tolist()) + print("Jet energy resolution sf:", test_jer_sf.tolist()) + print("Jet energy correction:", test_jec.tolist()) + print("Corrected jet pt (ref)", test_corr_pt.tolist()) + print("Corrected & smeared jet pt (ref):", test_pt_smear_corr.tolist()) + print("Corrected & smeared jet pt:", test_corrected_pt.tolist(), "\n") + assert ak.all(np.abs(test_pt_smear_corr - test_corrected_pt) < 1e-6) + + name_map["METpt"] = "pt" + name_map["METphi"] = "phi" + name_map["JetPhi"] = "phi" + name_map["UnClusteredEnergyDeltaX"] = "MetUnclustEnUpDeltaX" + name_map["UnClusteredEnergyDeltaY"] = "MetUnclustEnUpDeltaY" + + tic = time.time() + met_factory = CorrectedMETFactory(name_map) + toc = time.time() + + print("setup corrected MET time =", toc - tic) + + met = events.MET + tic = time.time() + # prof = pyinstrument.Profiler() + # prof.start() + corrected_met = met_factory.build(met, corrected_jets) + # prof.stop() + toc = time.time() + + # print(prof.output_text(unicode=True, color=True, show_all=True)) + + print("corrected_met build time =", toc - tic) + + print(corrected_met.dask) + + print(corrected_met.pt_orig.compute()) + print(corrected_met.pt.compute()) + tic = time.time() + prof = pyinstrument.Profiler() + prof.start() + + tocompute = { + unc: {"up": corrected_met[unc].up.pt, "down": corrected_met[unc].down.pt} + for unc in (jet_factory.uncertainties() + met_factory.uncertainties()) + } + computed_uncs = dask.compute(tocompute)[0] + + for unc in jet_factory.uncertainties() + met_factory.uncertainties(): + print(unc) + print(computed_uncs[unc]["up"]) + print(computed_uncs[unc]["down"]) + prof.stop() + toc = time.time() + + print("build all met variations =", toc - tic) + + print(prof.output_text(unicode=True, color=True, show_all=True)) diff --git a/tests/test_lookup_tools.py b/tests/test_lookup_tools.py index 85d7d5fac..f4d5a3b30 100644 --- a/tests/test_lookup_tools.py +++ b/tests/test_lookup_tools.py @@ -1,6 +1,7 @@ import os import awkward as ak +import dask import dask_awkward as dak import pytest from dummy_distributions import dummy_jagged_eta_pt @@ -142,97 +143,103 @@ def test_evaluate_noimpl(): pass -def test_correctionlib(): - extractor = lookup_tools.extractor() - extractor.add_weight_sets(["* * tests/samples/testSF2d.corr.json.gz"]) +@pytest.mark.parametrize("optimization_enabled", [True, False]) +def test_correctionlib(optimization_enabled): + with dask.config.set({"awkward.optimization.enabled": optimization_enabled}): + extractor = lookup_tools.extractor() + extractor.add_weight_sets(["* * tests/samples/testSF2d.corr.json.gz"]) - extractor.finalize() + extractor.finalize() - evaluator = extractor.make_evaluator() + evaluator = extractor.make_evaluator() - counts, test_eta, test_pt = dummy_jagged_eta_pt() + counts, test_eta, test_pt = dummy_jagged_eta_pt() - test_out = evaluator["scalefactors_Tight_Electron"](test_eta, test_pt) + test_out = evaluator["scalefactors_Tight_Electron"](test_eta, test_pt) - # print it - print(evaluator["scalefactors_Tight_Electron"]) + # print it + print(evaluator["scalefactors_Tight_Electron"]) - # test structured eval - test_eta_jagged = ak.unflatten(test_eta, counts) - test_pt_jagged = ak.unflatten(test_pt, counts) - test_out_jagged = evaluator["scalefactors_Tight_Electron"]( - test_eta_jagged, test_pt_jagged - ) + # test structured eval + test_eta_jagged = ak.unflatten(test_eta, counts) + test_pt_jagged = ak.unflatten(test_pt, counts) + test_out_jagged = evaluator["scalefactors_Tight_Electron"]( + test_eta_jagged, test_pt_jagged + ) - # test lazy eval - test_eta_dak = dak.from_awkward(test_eta_jagged, 1) - test_pt_dak = dak.from_awkward(test_pt_jagged, 1) - test_out_dak = evaluator["scalefactors_Tight_Electron"]( - test_eta_dak, test_pt_dak, dask_label="scalefactors_Tight_Electron" - ) + # test lazy eval + test_eta_dak = dak.from_awkward(test_eta_jagged, 1) + test_pt_dak = dak.from_awkward(test_pt_jagged, 1) + test_out_dak = evaluator["scalefactors_Tight_Electron"]( + test_eta_dak, test_pt_dak, dask_label="scalefactors_Tight_Electron" + ) - print(test_out_dak) + print(test_out_dak) - assert ak.all(ak.num(test_out_jagged) == counts) - assert ak.all(ak.flatten(test_out_jagged) == test_out) - assert ak.all(ak.flatten(test_out_dak.compute()) == test_out) + assert ak.all(ak.num(test_out_jagged) == counts) + assert ak.all(ak.flatten(test_out_jagged) == test_out) + assert ak.all(ak.flatten(test_out_dak.compute()) == test_out) - print(test_out) + print(test_out) - diff = np.abs(test_out - _testSF2d_expected_output) - print("Max diff: %.16f" % diff.max()) - print("Median diff: %.16f" % np.median(diff)) - print( - "Diff over threshold rate: %.1f %%" % (100 * (diff >= 1.0e-8).sum() / diff.size) - ) - assert (diff < 1.0e-8).all() + diff = np.abs(test_out - _testSF2d_expected_output) + print("Max diff: %.16f" % diff.max()) + print("Median diff: %.16f" % np.median(diff)) + print( + "Diff over threshold rate: %.1f %%" + % (100 * (diff >= 1.0e-8).sum() / diff.size) + ) + assert (diff < 1.0e-8).all() -def test_root_scalefactors(): - extractor = lookup_tools.extractor() - extractor.add_weight_sets( - ["testSF2d scalefactors_Tight_Electron tests/samples/testSF2d.histo.root"] - ) +@pytest.mark.parametrize("optimization_enabled", [True, False]) +def test_root_scalefactors(optimization_enabled): + with dask.config.set({"awkward.optimization.enabled": optimization_enabled}): + extractor = lookup_tools.extractor() + extractor.add_weight_sets( + ["testSF2d scalefactors_Tight_Electron tests/samples/testSF2d.histo.root"] + ) - extractor.finalize(reduce_list=["testSF2d"]) + extractor.finalize(reduce_list=["testSF2d"]) - evaluator = extractor.make_evaluator() + evaluator = extractor.make_evaluator() - counts, test_eta, test_pt = dummy_jagged_eta_pt() + counts, test_eta, test_pt = dummy_jagged_eta_pt() - # test flat eval - test_out = evaluator["testSF2d"](test_eta, test_pt) + # test flat eval + test_out = evaluator["testSF2d"](test_eta, test_pt) - # print it - print(evaluator["testSF2d"]) + # print it + print(evaluator["testSF2d"]) - # test structured eval - test_eta_jagged = ak.unflatten(test_eta, counts) - test_pt_jagged = ak.unflatten(test_pt, counts) - test_out_jagged = evaluator["testSF2d"](test_eta_jagged, test_pt_jagged) + # test structured eval + test_eta_jagged = ak.unflatten(test_eta, counts) + test_pt_jagged = ak.unflatten(test_pt, counts) + test_out_jagged = evaluator["testSF2d"](test_eta_jagged, test_pt_jagged) - # test lazy eval - test_eta_dak = dak.from_awkward(test_eta_jagged, 1) - test_pt_dak = dak.from_awkward(test_pt_jagged, 1) - test_out_dak = evaluator["testSF2d"]( - test_eta_dak, test_pt_dak, dask_label="testSF2d" - ) + # test lazy eval + test_eta_dak = dak.from_awkward(test_eta_jagged, 1) + test_pt_dak = dak.from_awkward(test_pt_jagged, 1) + test_out_dak = evaluator["testSF2d"]( + test_eta_dak, test_pt_dak, dask_label="testSF2d" + ) - print(test_out_dak) + print(test_out_dak) - assert ak.all(ak.num(test_out_jagged) == counts) - assert ak.all(ak.flatten(test_out_jagged) == test_out) - assert ak.all(ak.flatten(test_out_dak.compute()) == test_out) + assert ak.all(ak.num(test_out_jagged) == counts) + assert ak.all(ak.flatten(test_out_jagged) == test_out) + assert ak.all(ak.flatten(test_out_dak.compute()) == test_out) - print(test_out) + print(test_out) - diff = np.abs(test_out - _testSF2d_expected_output) - print("Max diff: %.16f" % diff.max()) - print("Median diff: %.16f" % np.median(diff)) - print( - "Diff over threshold rate: %.1f %%" % (100 * (diff >= 1.0e-8).sum() / diff.size) - ) - assert (diff < 1.0e-8).all() + diff = np.abs(test_out - _testSF2d_expected_output) + print("Max diff: %.16f" % diff.max()) + print("Median diff: %.16f" % np.median(diff)) + print( + "Diff over threshold rate: %.1f %%" + % (100 * (diff >= 1.0e-8).sum() / diff.size) + ) + assert (diff < 1.0e-8).all() def test_histo_json_scalefactors():