diff --git a/lifelib/libraries/savings/CashValue_ME_EX4/Projection/__init__.py b/lifelib/libraries/savings/CashValue_ME_EX4/Projection/__init__.py index 599a5f3..cff24ba 100644 --- a/lifelib/libraries/savings/CashValue_ME_EX4/Projection/__init__.py +++ b/lifelib/libraries/savings/CashValue_ME_EX4/Projection/__init__.py @@ -956,9 +956,9 @@ def inv_return_table(): sigma = 0.03 dt = 1/12 - return np.exp( - (mu - 0.5 * sigma**2) * dt + sigma * dt**0.5 * std_norm_rand() - ) - 1 + return np.tile(np.exp( + (mu - 0.5 * sigma**2) * dt + sigma * dt**0.5 * std_norm_rand()) - 1, + (point_size(), 1)) def is_wl(): @@ -1920,6 +1920,8 @@ def surr_charge_table_stacked(): return surr_charge_table.stack().reorder_levels([1, 0]).sort_index() +point_size = lambda: len(model_point_table_ext()) + # --------------------------------------------------------------------------- # References @@ -1939,10 +1941,10 @@ def surr_charge_table_stacked(): stats = ("Module", "scipy.stats") -scen_size = 10000 +scen_size = 1000 model_point_1 = ("DataClient", 1916615684048) model_point_moneyness = ("DataClient", 1916616522432) -model_point_table = ("DataClient", 1916615684048) \ No newline at end of file +model_point_table = ("DataClient", 1916616522432) \ No newline at end of file diff --git a/lifelib/libraries/savings/CashValue_ME_EX4/model_point_moneyness.xlsx b/lifelib/libraries/savings/CashValue_ME_EX4/model_point_moneyness.xlsx index 41bea04..a53dac4 100644 Binary files a/lifelib/libraries/savings/CashValue_ME_EX4/model_point_moneyness.xlsx and b/lifelib/libraries/savings/CashValue_ME_EX4/model_point_moneyness.xlsx differ diff --git a/lifelib/libraries/savings/CashValue_ME_EX4/product_spec_table.xlsx b/lifelib/libraries/savings/CashValue_ME_EX4/product_spec_table.xlsx index 135ff36..ad13ed1 100644 Binary files a/lifelib/libraries/savings/CashValue_ME_EX4/product_spec_table.xlsx and b/lifelib/libraries/savings/CashValue_ME_EX4/product_spec_table.xlsx differ diff --git a/lifelib/libraries/savings/savings_example4.ipynb b/lifelib/libraries/savings/savings_example4.ipynb index f966ddd..5450bf2 100644 --- a/lifelib/libraries/savings/savings_example4.ipynb +++ b/lifelib/libraries/savings/savings_example4.ipynb @@ -5,9 +5,30 @@ "id": "9400df98", "metadata": {}, "source": [ - "# 4. Optimizing and profiling the model\n", + "# 4. Profiling and Optimizing the Model\n", "\n", - "TODO: Add description" + "This example shows how to profile the `CashValue_ME_EX1` model and how to optimize it. The optimized model is named `CashValue_ME_EX4`, and included in the `savings` library. `CashValue_ME_EX4` is about 4 times faster than `CashValue_ME_EX1`, although the result may be different on different hardware environments.\n", + "The profiling and model optimization approach shown here is applicable to any models in lifelib that use modelx and heavily depends on pandas.\n", + "\n", + "The profiling takes the following steps:\n", + "* Measure the run time of `CashValue_ME_EX1` using the `timeit` standard library. \n", + "* Profile the model using the `start_stacktrace`, `get_stacktrace`, `stop_stacktrace` functions of modelx.\n", + "* Output the profiling result and see what cells are taking time.\n", + "\n", + "In this example, time consuming cells are those that are using pandas heavily, so the optimization focuses on replacing pandas DataFrames and Series with numpy arrays and avoiding time-consuming pandas operations.\n", + "Keep in mind that replacing pandas objects with numpy arrays reduces readability of the data that the model holds. \n", + "Pandas objects can have more representative indexes compared to integer-intexed numpy arrays.\n", + "Pandas objects also allow more sphisticated and complex operations on tabular data, so whether to use numpy for speed or pandas for ease of use is a trade-off." + ] + }, + { + "cell_type": "markdown", + "id": "2330e5ee", + "metadata": {}, + "source": [ + "## Measuring the runtime of the model\n", + "\n", + "The code below loads `CashValue_ME_EX1`, assigns it to `ex1`." ] }, { @@ -32,16 +53,214 @@ "ex1 = mx.read_model('CashValue_ME_EX1') " ] }, + { + "cell_type": "markdown", + "id": "987ed9ef", + "metadata": {}, + "source": [ + "By default, in `CashValue_ME_EX1`, 1 model point on 10,000 scenarios are set, which would require the same calculation load as running 10,000 model points on 1 scenario.\n", + "\n", + "The product spec id (`spec_id`) of the default single model point is `A`, which does not have surrender charge. Later in this example, we want to observe how surrender charge rates are looked up based on product specs, so here we change the model point table so that it refers to a table of multiple model points of multiple product specs.\n" + ] + }, { "cell_type": "code", "execution_count": 3, - "id": "55aaff8b", + "id": "3065fd30", "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
spec_idage_at_entrysexpolicy_termpolicy_countsum_assuredduration_mthpremium_ppav_pp_initaccum_prem_init_pp
point_id
1A20M10100500000050000000
2B20M10100500000047500000
3C20M10100500000045000000
4D20M10100500000042500000
5A20M10100500000040000000
6B20M10100500000037500000
7C20M10100500000035000000
8D20M10100500000032500000
9A20M10100500000030000000
\n", + "
" + ], "text/plain": [ - "1.2499775000000004" + " spec_id age_at_entry sex policy_term policy_count sum_assured \\\n", + "point_id \n", + "1 A 20 M 10 100 500000 \n", + "2 B 20 M 10 100 500000 \n", + "3 C 20 M 10 100 500000 \n", + "4 D 20 M 10 100 500000 \n", + "5 A 20 M 10 100 500000 \n", + "6 B 20 M 10 100 500000 \n", + "7 C 20 M 10 100 500000 \n", + "8 D 20 M 10 100 500000 \n", + "9 A 20 M 10 100 500000 \n", + "\n", + " duration_mth premium_pp av_pp_init accum_prem_init_pp \n", + "point_id \n", + "1 0 500000 0 0 \n", + "2 0 475000 0 0 \n", + "3 0 450000 0 0 \n", + "4 0 425000 0 0 \n", + "5 0 400000 0 0 \n", + "6 0 375000 0 0 \n", + "7 0 350000 0 0 \n", + "8 0 325000 0 0 \n", + "9 0 300000 0 0 " ] }, "execution_count": 3, @@ -50,109 +269,157 @@ } ], "source": [ - "timeit.timeit('ex1.Projection.result_pv()', globals=globals(), number=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cc320585", - "metadata": {}, - "outputs": [], - "source": [ - "ex1.clear_all()" + "ex1.Projection.model_point_table = ex1.Projection.model_point_moneyness # Set multiple model points\n", + "ex1.Projection.model_point_table['spec_id'] = ['A', 'B', 'C', 'D', 'A', 'B', 'C', 'D', 'A'] # Set various spec IDs\n", + "ex1.Projection.model_point_moneyness" ] }, { - "cell_type": "code", - "execution_count": 5, - "id": "759cb684", + "cell_type": "markdown", + "id": "d6d06de8", "metadata": {}, - "outputs": [], "source": [ - "ex4 = mx.read_model('CashValue_ME_EX4') " + "The product specs by `spec_id` are defined in `product_spec_table`. The `is_wl` column indicates whether each type is whole life or not. \n", + "To save run time and memory, let's set `is_wl` to `False` for all the specs." ] }, { "cell_type": "code", - "execution_count": 6, - "id": "e9caf8ae", + "execution_count": 4, + "id": "d5eb4181", "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
premium_typehas_surr_chargesurr_charge_idload_prem_rateis_wl
spec_id
ASINGLEFalseNaN0.00False
BSINGLETruetype_10.00False
CLEVELFalseNaN0.10False
DLEVELTruetype_30.05False
\n", + "
" + ], "text/plain": [ - "0.2862044000000008" + " premium_type has_surr_charge surr_charge_id load_prem_rate is_wl\n", + "spec_id \n", + "A SINGLE False NaN 0.00 False\n", + "B SINGLE True type_1 0.00 False\n", + "C LEVEL False NaN 0.10 False\n", + "D LEVEL True type_3 0.05 False" ] }, - "execution_count": 6, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "timeit.timeit('ex4.Projection.result_pv()', globals=globals(), number=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "eaa6268e", - "metadata": {}, - "outputs": [], - "source": [ - "ex4.clear_all()" + "ex1.Projection.product_spec_table['is_wl'] = False\n", + "ex1.Projection.product_spec_table" ] }, { "cell_type": "markdown", - "id": "952262b9", + "id": "ed399067", "metadata": {}, "source": [ - "TODO: Add description" + "For the same reason, we reduce the number of scenarios from 10,000 to 1000." ] }, { - "cell_type": "markdown", - "id": "e7a25c30", + "cell_type": "code", + "execution_count": 5, + "id": "b40dd116", "metadata": {}, + "outputs": [], "source": [ - "## Testing with 100,000 scenarios\n", - "\n", - "TODO: Add description\n", - "\n", - "
\n", - "\n", - "**Warning:**\n", - "\n", - "takes about 4GB of memory space \n", - " \n", - "
" + "ex1.Projection.scen_size = 1000" ] }, { - "cell_type": "code", - "execution_count": 8, - "id": "42ef0104", + "cell_type": "markdown", + "id": "3fddf518", "metadata": {}, - "outputs": [], "source": [ - "ex1.Projection.scen_size = 100_000" + "Now let's see how much time the model takes for a run. The code below calculates `result_pv()` by measuring the run time by `timeit`. `number=1` indicates the run is performed only once." ] }, { "cell_type": "code", - "execution_count": 9, - "id": "66cc75a8", + "execution_count": 6, + "id": "55aaff8b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "8.0889741" + "0.915054" ] }, - "execution_count": 9, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -162,69 +429,61 @@ ] }, { - "cell_type": "code", - "execution_count": 10, - "id": "be375753", - "metadata": {}, - "outputs": [], - "source": [ - "ex1.clear_all()" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "a911b29b", + "cell_type": "markdown", + "id": "c2f7976b", "metadata": {}, - "outputs": [], "source": [ - "ex4.Projection.scen_size = 100_000" + "Let's output the mean of the present value of net cashflows of `ex1`, as we want to check it against the result of the optimized model." ] }, { "cell_type": "code", - "execution_count": 12, - "id": "0026cc1c", + "execution_count": 7, + "id": "2f0e249f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "2.3929682999999997" + "44386401.300826035" ] }, - "execution_count": 12, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "timeit.timeit('ex4.Projection.result_pv()', globals=globals(), number=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "caee567e", - "metadata": {}, - "outputs": [], - "source": [ - "ex4.clear_all()" + "ex1.Projection.result_pv()['Net Cashflow'].mean()" ] }, { "cell_type": "markdown", - "id": "2ba009d9", + "id": "30a965a7", "metadata": {}, "source": [ - "## Profile the runs\n", + "## Profiling the run\n", + "\n", + "To profile `ex1`, we use modelx's feature to trace a run. modelx offers 3 functions, `start_stacktrace`, `get_stacktrace` and `stop_stacktrace`, to start, output and stop tracing the call stack during a run. \n", + "The code block below is an idiomatic expression for using the functions:\n", "\n", - "TODO: Add description" + "```python\n", + "try:\n", + " mx.start_stacktrace(maxlen=None)\n", + " ex1.Projection.result_pv()\n", + " df = pd.DataFrame.from_dict(\n", + " mx.get_stacktrace(summarize=True), orient=\"index\")\n", + "finally:\n", + " mx.stop_stacktrace()\n", + " ex1.clear_all()\n", + "```\n", + "\n", + "In this example, we want more concise output on what cells are taking time and how much, so we define our custom function that profiles and reports a run using the code block above." ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 8, "id": "f36620bb", "metadata": {}, "outputs": [], @@ -247,17 +506,24 @@ " # Add duration %\n", " total = df['duration'].sum()\n", " df['dur_perc'] = df['duration'] * 100 / total\n", + " df = df[['calls', 'duration', 'dur_perc']]\n", " \n", " return df.sort_values(['dur_perc'], ascending=False)" ] }, + { + "cell_type": "markdown", + "id": "33358fef", + "metadata": {}, + "source": [ + "The code below performs a profile run, and output 10 most time-consuming cells." + ] + }, { "cell_type": "code", - "execution_count": 15, - "id": "93a54ef2", - "metadata": { - "scrolled": true - }, + "execution_count": 9, + "id": "cc320585", + "metadata": {}, "outputs": [ { "name": "stderr", @@ -288,6 +554,7 @@ " \n", " \n", " \n", + " calls\n", " duration\n", " dur_perc\n", " \n", @@ -295,99 +562,108 @@ " \n", " \n", " surr_charge_rate(t)\n", - " 0.697034\n", - " 39.238824\n", + " 121\n", + " 0.529899\n", + " 42.448094\n", " \n", " \n", - " inv_return_mth(t)\n", - " 0.338032\n", - " 19.029207\n", + " premium_pp(t)\n", + " 121\n", + " 0.128997\n", + " 10.333420\n", " \n", " \n", - " premium_pp(t)\n", - " 0.146663\n", - " 8.256274\n", + " claim_pp(t, kind)\n", + " 242\n", + " 0.096036\n", + " 7.693036\n", " \n", " \n", - " pols_new_biz(t)\n", - " 0.069975\n", - " 3.939195\n", + " inv_return_mth(t)\n", + " 121\n", + " 0.040032\n", + " 3.206834\n", " \n", " \n", - " inv_income_pp(t)\n", - " 0.069556\n", - " 3.915600\n", + " av_pp_at(t, timing)\n", + " 485\n", + " 0.036636\n", + " 2.934734\n", " \n", " \n", " claims(t, kind)\n", - " 0.064405\n", - " 3.625641\n", + " 484\n", + " 0.035000\n", + " 2.803736\n", " \n", " \n", - " expenses(t)\n", - " 0.035827\n", - " 2.016841\n", + " pols_new_biz(t)\n", + " 121\n", + " 0.030402\n", + " 2.435416\n", " \n", " \n", - " pols_maturity(t)\n", - " 0.032865\n", - " 1.850092\n", + " pols_if_at(t, timing)\n", + " 364\n", + " 0.030123\n", + " 2.413071\n", " \n", " \n", - " claim_pp(t, kind)\n", - " 0.032804\n", - " 1.846696\n", + " inv_income(t)\n", + " 121\n", + " 0.027999\n", + " 2.242901\n", " \n", " \n", - " prem_to_av_pp(t)\n", - " 0.031291\n", - " 1.761523\n", + " pols_lapse(t)\n", + " 121\n", + " 0.027977\n", + " 2.241144\n", " \n", " \n", "\n", "" ], "text/plain": [ - " duration dur_perc\n", - "surr_charge_rate(t) 0.697034 39.238824\n", - "inv_return_mth(t) 0.338032 19.029207\n", - "premium_pp(t) 0.146663 8.256274\n", - "pols_new_biz(t) 0.069975 3.939195\n", - "inv_income_pp(t) 0.069556 3.915600\n", - "claims(t, kind) 0.064405 3.625641\n", - "expenses(t) 0.035827 2.016841\n", - "pols_maturity(t) 0.032865 1.850092\n", - "claim_pp(t, kind) 0.032804 1.846696\n", - "prem_to_av_pp(t) 0.031291 1.761523" + " calls duration dur_perc\n", + "surr_charge_rate(t) 121 0.529899 42.448094\n", + "premium_pp(t) 121 0.128997 10.333420\n", + "claim_pp(t, kind) 242 0.096036 7.693036\n", + "inv_return_mth(t) 121 0.040032 3.206834\n", + "av_pp_at(t, timing) 485 0.036636 2.934734\n", + "claims(t, kind) 484 0.035000 2.803736\n", + "pols_new_biz(t) 121 0.030402 2.435416\n", + "pols_if_at(t, timing) 364 0.030123 2.413071\n", + "inv_income(t) 121 0.027999 2.242901\n", + "pols_lapse(t) 121 0.027977 2.241144" ] }, - "execution_count": 15, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ex1.Projection.scen_size = 10_000\n", - "df1 = get_time_info(ex1)\n", - "df1[['duration', 'dur_perc']].iloc[:10]" + "ex1.clear_all() # Clear the result from the previous run\n", + "get_time_info(ex1).iloc[:10]" + ] + }, + { + "cell_type": "markdown", + "id": "3a435fef", + "metadata": {}, + "source": [ + "The output tells that `surr_charge_rate(t)` is consuming time the most, which is more than 30% of the total run time. Its fomula looks like below.\n", + "\n", + "`surr_charge_rate(t)` represents the surrener charge rates to be applied at time `t`. The surrender charge rates are defined by rate ID (such as `type_1`) and duration, and stored in `surr_charge_table` as a DataFrame. " ] }, { "cell_type": "code", - "execution_count": 16, - "id": "00a0f818", - "metadata": { - "scrolled": true - }, + "execution_count": 10, + "id": "cad13801", + "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "UserWarning: call stack trace activated\n", - "UserWarning: call stack trace deactivated\n" - ] - }, { "data": { "text/html": [ @@ -409,98 +685,258 @@ " \n", " \n", " \n", + " type_1\n", + " type_2\n", + " type_3\n", + " \n", + " \n", " duration\n", - " dur_perc\n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " premium_pp(t)\n", - " 0.033878\n", - " 10.744304\n", + " 0\n", + " 0.10\n", + " 0.08\n", + " 0.05\n", " \n", " \n", - " mort_rate_mth(t)\n", - " 0.032263\n", - " 10.231875\n", + " 1\n", + " 0.09\n", + " 0.07\n", + " 0.04\n", " \n", " \n", - " surr_charge_rate(t)\n", - " 0.024496\n", - " 7.768857\n", + " 2\n", + " 0.08\n", + " 0.06\n", + " 0.03\n", " \n", " \n", - " duration(t)\n", - " 0.019624\n", - " 6.223781\n", + " 3\n", + " 0.07\n", + " 0.05\n", + " 0.02\n", " \n", " \n", - " inv_return_table()\n", - " 0.018503\n", - " 5.868249\n", + " 4\n", + " 0.06\n", + " 0.04\n", + " 0.01\n", " \n", " \n", - " pv_claims(kind)\n", - " 0.018216\n", - " 5.776984\n", + " 5\n", + " 0.05\n", + " 0.03\n", + " 0.00\n", " \n", " \n", - " claims(t, kind)\n", - " 0.018125\n", - " 5.748327\n", + " 6\n", + " 0.04\n", + " 0.02\n", + " 0.00\n", " \n", " \n", - " av_at(t, timing)\n", - " 0.016349\n", - " 5.185010\n", + " 7\n", + " 0.03\n", + " 0.01\n", + " 0.00\n", " \n", " \n", - " prem_to_av_pp(t)\n", - " 0.015626\n", - " 4.955751\n", + " 8\n", + " 0.02\n", + " 0.00\n", + " 0.00\n", " \n", " \n", - " pols_new_biz(t)\n", - " 0.015588\n", - " 4.943653\n", + " 9\n", + " 0.01\n", + " 0.00\n", + " 0.00\n", + " \n", + " \n", + " 10\n", + " 0.00\n", + " 0.00\n", + " 0.00\n", " \n", " \n", "\n", "" ], "text/plain": [ - " duration dur_perc\n", - "premium_pp(t) 0.033878 10.744304\n", - "mort_rate_mth(t) 0.032263 10.231875\n", - "surr_charge_rate(t) 0.024496 7.768857\n", - "duration(t) 0.019624 6.223781\n", - "inv_return_table() 0.018503 5.868249\n", - "pv_claims(kind) 0.018216 5.776984\n", - "claims(t, kind) 0.018125 5.748327\n", - "av_at(t, timing) 0.016349 5.185010\n", - "prem_to_av_pp(t) 0.015626 4.955751\n", - "pols_new_biz(t) 0.015588 4.943653" + " type_1 type_2 type_3\n", + "duration \n", + "0 0.10 0.08 0.05\n", + "1 0.09 0.07 0.04\n", + "2 0.08 0.06 0.03\n", + "3 0.07 0.05 0.02\n", + "4 0.06 0.04 0.01\n", + "5 0.05 0.03 0.00\n", + "6 0.04 0.02 0.00\n", + "7 0.03 0.01 0.00\n", + "8 0.02 0.00 0.00\n", + "9 0.01 0.00 0.00\n", + "10 0.00 0.00 0.00" ] }, - "execution_count": 16, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ex4.Projection.scen_size = 10_000\n", - "df4 = get_time_info(ex4)\n", - "df4[['duration', 'dur_perc']].iloc[:10]" + "ex1.Projection.surr_charge_table" + ] + }, + { + "cell_type": "markdown", + "id": "bd219f38", + "metadata": {}, + "source": [ + "`surr_charge_table_stacked()` transforms the DataFrame into a Series by combining the row and column indexes of the DataFrame into a MultiIndex. " ] }, { "cell_type": "code", - "execution_count": 17, - "id": "63b6dff5", - "metadata": { - "scrolled": true - }, + "execution_count": 11, + "id": "423d7635", + "metadata": {}, "outputs": [ + { + "data": { + "text/plain": [ + " duration\n", + "type_1 0 0.10\n", + " 1 0.09\n", + " 2 0.08\n", + " 3 0.07\n", + " 4 0.06\n", + "dtype: float64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ex1.Projection.surr_charge_table_stacked().head()" + ] + }, + { + "cell_type": "markdown", + "id": "13981e8f", + "metadata": {}, + "source": [ + "`surr_charge_rate(t)` looks up `surr_charge_table_stacked()` using a MultiIndex key, which is created from `surr_charge_id()` and `duration(t)` and other cells as in the formula definition below.\n", + "\n", + "```python\n", + "def surr_charge_rate(t):\n", + " idx = pd.MultiIndex.from_arrays(\n", + " [has_surr_charge() * surr_charge_id(),\n", + " np.minimum(duration(t), surr_charge_max_idx())])\n", + "\n", + " return surr_charge_table_stacked().reindex(idx, fill_value=0).set_axis(\n", + " model_point().index, inplace=False)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "5b35e2ac", + "metadata": {}, + "source": [ + "Many pandas operations are involved in the formula. We can perform the same operations with much smaller cost by rewriting the formula and other relevant formulas to use numpy arrays instead of DataFrames or Series.\n", + "Based on this approach, `surr_charge_rate(t)` can be written as:\n", + "\n", + "```python\n", + "def surr_charge_rate(t):\n", + " ind_row = np.minimum(duration(t), surr_charge_max_idx())\n", + " return surr_charge_table.values.flat[\n", + " surr_charge_table_column() + ind_row * len(surr_charge_table.columns)]\n", + "```\n", + "\n", + "where `surr_charge_table_column()` is a newly introduced cells and is defined as follows:\n", + "\n", + "```python\n", + "def surr_charge_table_column():\n", + " return surr_charge_table.columns.searchsorted(\n", + " surr_charge_id(), side='right') - 1\n", + "\n", + "```\n", + "\n", + "The new `surr_charge_rate(t)` returns a numpy array for each `t`, instead of a Series. In the same way, we can make other cells return numpy arrays instead of Series or DataFrames. `CashValue_ME_EX4` included in the library is the model to which all the changes are applied to.\n", + "\n", + "The linked page below shows the entire comparison of before and after the changes. \n", + "\n", + "https://www.diffchecker.com/iseYbXUD/\n", + "\n", + "You can see the expression for the returned object has `.values` at the end in many of the changed formulas. The `.values` property on a DataFrame or a Series is for returning the contained values as a numpy array instead of the DataFrame or the Series.\n", + "\n", + "Now let's check the speed of the optimized model.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "e9caf8ae", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.23962179999999922" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ex4 = mx.read_model('CashValue_ME_EX4') \n", + "timeit.timeit('ex4.Projection.result_pv()', globals=globals(), number=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "2053f74a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "44386401.300826035" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ex4.Projection.result_pv()['Net Cashflow'].mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "c7fb56da", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "UserWarning: call stack trace activated\n", + "UserWarning: call stack trace deactivated\n" + ] + }, { "data": { "text/html": [ @@ -522,64 +958,98 @@ " \n", " \n", " \n", - " df1\n", - " df4\n", + " calls\n", + " duration\n", + " dur_perc\n", " \n", " \n", " \n", " \n", + " pols_lapse(t)\n", + " 121\n", + " 0.037503\n", + " 13.435736\n", + " \n", + " \n", + " premium_pp(t)\n", + " 121\n", + " 0.023004\n", + " 8.241311\n", + " \n", + " \n", " surr_charge_rate(t)\n", - " 0.697034\n", - " 0.024496\n", + " 121\n", + " 0.023000\n", + " 8.240116\n", " \n", " \n", - " inv_return_mth(t)\n", - " 0.338032\n", - " 0.000000\n", + " av_pp_at(t, timing)\n", + " 485\n", + " 0.018002\n", + " 6.449362\n", " \n", " \n", - " premium_pp(t)\n", - " 0.146663\n", - " 0.033878\n", + " claims(t, kind)\n", + " 484\n", + " 0.014502\n", + " 5.195450\n", " \n", " \n", - " pols_new_biz(t)\n", - " 0.069975\n", - " 0.015588\n", + " pols_if_at(t, timing)\n", + " 364\n", + " 0.011003\n", + " 3.941879\n", " \n", " \n", - " inv_income_pp(t)\n", - " 0.069556\n", - " 0.015507\n", + " pv_claims(kind)\n", + " 4\n", + " 0.010003\n", + " 3.583729\n", " \n", " \n", - " claims(t, kind)\n", - " 0.064405\n", - " 0.018125\n", + " lapse_rate(t)\n", + " 121\n", + " 0.009001\n", + " 3.224894\n", + " \n", + " \n", + " expenses(t)\n", + " 121\n", + " 0.008003\n", + " 2.867000\n", + " \n", + " \n", + " inv_income(t)\n", + " 121\n", + " 0.007999\n", + " 2.865804\n", " \n", " \n", "\n", "" ], "text/plain": [ - " df1 df4\n", - "surr_charge_rate(t) 0.697034 0.024496\n", - "inv_return_mth(t) 0.338032 0.000000\n", - "premium_pp(t) 0.146663 0.033878\n", - "pols_new_biz(t) 0.069975 0.015588\n", - "inv_income_pp(t) 0.069556 0.015507\n", - "claims(t, kind) 0.064405 0.018125" + " calls duration dur_perc\n", + "pols_lapse(t) 121 0.037503 13.435736\n", + "premium_pp(t) 121 0.023004 8.241311\n", + "surr_charge_rate(t) 121 0.023000 8.240116\n", + "av_pp_at(t, timing) 485 0.018002 6.449362\n", + "claims(t, kind) 484 0.014502 5.195450\n", + "pols_if_at(t, timing) 364 0.011003 3.941879\n", + "pv_claims(kind) 4 0.010003 3.583729\n", + "lapse_rate(t) 121 0.009001 3.224894\n", + "expenses(t) 121 0.008003 2.867000\n", + "inv_income(t) 121 0.007999 2.865804" ] }, - "execution_count": 17, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df = pd.concat([df1[['duration']], df4[['duration']]], axis=1)\n", - "df.columns = ['df1', 'df4']\n", - "df.loc[(df['df1'] > 0.05) | (df['df4'] > 0.05)]" + "ex4.clear_all() # Clear the result from the previous run\n", + "get_time_info(ex4).iloc[:10]" ] } ],