Discrepancy Python and R version - Python version not returning optimal response | LD_AUGLAG & LD_SLSQP #560

NumesSanguis · 2024-08-09T05:15:18Z

Thank you for creating this package.
I'm re-implementing a R version into Python for easier deployment, but run into an issue that they're not returning the same.

I believe the following R code (using nloptr) is equivalent to the Python code, but the Python code is not returning the optimal solution.

I'm using nlopt.LD_AUGLAG for the optimizer and nlopt.LD_SLSQP for the local optimizer.

Output

R

Lower bound: 401.553493538462 401.553493538462 401.553493538462 401.553493538462 401.553493538462 
Upper bound: 4015534.93538462 4015534.93538462 4015534.93538462 4015534.93538462 4015534.93538462 
x0: 401.553493538462 401.553493538462 401.553493538462 401.553493538462 401.553493538462 

Optimal media mix spend: 4015534.93538462 
401.553493538462 
401.553493538462 
1852965.52954886 
1916674.96416339 
245091.33468529 

Optimal media mix response: 56.836128179131 
0.000699712963819784 
9.0294969530361e-09 
19.8435763296485 
26.6673693136553 
10.3244828138338

Python

Lower bound: [401.553493538462, 401.553493538462, 401.553493538462, 401.553493538462, 401.553493538462]
Upper bound: [4015534.93538462, 4015534.93538462, 4015534.93538462, 4015534.93538462, 4015534.93538462]
x0: [401.553493538462, 401.553493538462, 401.553493538462, 401.553493538462, 401.553493538462]

Optimal media mix spend: 4015538.4615766173
401.553493538462
602439.6180150149
1538926.7098591584
1484602.9765656642
389167.6036432416

Optimal media mix response: 48.59968938633828
0.0006997129638197839
1.5008161730954612
15.679020384730313
18.797176881673398
12.621976233875278

R code

Simplified version of the Meta Robyn code.

# Display more digits after the decimal point
options(digits=15)

param_coefs <- c(46.4298759930757, 189.913150925424, 133.59888898357, 43.560285303803, 67.4152756322413)
param_alphas <- c(1.2523346953, 2.5893164892, 1.4620854618, 2.8663548463, 0.52337205453)
param_inflexions <- c(13058289.6634197, 8498256.18987648, 8468125.57387393, 3260125.76148595, 14280449.7428876)
param_carryover_rate <- c(4.590262, 2.182036, 1.384337, 1.994616, 2.219991)
param_weekly_budget <- 4015534.93538462

lb_ratio = c(0.0001, 0.0001, 0.0001, 0.0001, 0.0001)
ub_ratio = c(1.0, 1.0, 1.0, 1.0, 1.0)

lb = lb_ratio * param_weekly_budget
ub = ub_ratio * param_weekly_budget
x0 = lb

cat("Lower bound:", lb, "\n")
cat("Upper bound:", ub, "\n")
cat("x0:", x0, "\n")
cat("\n")


eval_f <- function(X, grad) {
    # grad is not used
    
    objective <- -sum(mapply(
        fx_objective,
        x = X,
        coeff = param_coefs,
        alpha = param_alphas,
        inflexion = param_inflexions,
        carryover_rate = param_carryover_rate,
        SIMPLIFY = TRUE
    ))

    gradient <- c(mapply(
        fx_gradient,
        x = X,
        coeff = param_coefs,
        alpha = param_alphas,
        inflexion = param_inflexions,
        carryover_rate = param_carryover_rate,
        SIMPLIFY = TRUE
    ))

    optm <- list(objective = objective, gradient = gradient)
    return(optm)
}

fx_objective <- function(x, coeff, alpha, inflexion, carryover_rate) {  # x_hist_carryover
    # Adstock scales
    xAdstocked <- x * carryover_rate   # + mean(x_hist_carryover)
    
    # Hill transformation
    xOut <- coeff * sum((1 + inflexion**alpha / xAdstocked**alpha)**-1)
    
    return(xOut)
}

fx_gradient <- function(x, coeff, alpha, inflexion, carryover_rate) {  # x_hist_carryover
    # Adstock scales
    xAdstocked <- x * carryover_rate   # + mean(x_hist_carryover)
    
    # Prevent division by 0
    if (xAdstocked < 1e-40) {
        return(1e-40)
    }
    
    xOut <- -coeff * sum((alpha * (inflexion**alpha) * (xAdstocked**(alpha - 1))) / (xAdstocked**alpha + inflexion**alpha)**2)
    return(xOut)
}

fx_objective.chanel <- function(x, coeff, alpha, inflexion, carryover_rate) {  # x_hist_carryover
    # Adstock scales
    xAdstocked <- x * carryover_rate   # + mean(x_hist_carryover)
    
    xOut <- -coeff * sum((1 + inflexion**alpha / xAdstocked**alpha)**-1)
    return(xOut)
}

eval_g_eq <- function(X, grad) {
    constr <- sum(X) - param_weekly_budget
    grad <- rep(1, length(X))
    return(list(
        "constraints" = constr,
        "jacobian" = grad
    ))
}


# https://astamm.github.io/nloptr/articles/nloptr.html
local_opts <- list(
    "algorithm" = "NLOPT_LD_SLSQP",
    "xtol_rel" = 1.0e-13
)

nlsMod <- nloptr::nloptr(
    x0 = x0,
    eval_f = eval_f,
    eval_g_eq = eval_g_eq,
    lb = lb,
    ub = ub,
    opts = list(
        "algorithm" = "NLOPT_LD_AUGLAG",
        "xtol_rel" = 1.0e-10,
        "maxeval" = 10000,  # maxeval,
        "local_opts" = local_opts
    ),
    grad = NULL
)
optmSpendUnit <- nlsMod$solution

cat("Optimal media mix spend:", sum(optmSpendUnit), "\n")
for (unit in optmSpendUnit) {
  cat(unit, "\n")
}
cat("\n")


optmResponseUnit <- -mapply(
    fx_objective.chanel,
    x = optmSpendUnit,
    coeff = param_coefs,
    alpha = param_alphas,
    inflexion = param_inflexions,
    carryover_rate = param_carryover_rate,
    SIMPLIFY = TRUE
)

cat("Optimal media mix response:", sum(optmResponseUnit), "\n")
for (unit in optmResponseUnit) {
  cat(unit, "\n")
}
cat("\n")

Python code

import numpy as np
import nlopt


param_coefs = np.array([46.4298759930757, 189.913150925424, 133.59888898357, 43.560285303803, 67.4152756322413])
param_alphas = np.array([1.2523346953, 2.5893164892, 1.4620854618, 2.8663548463, 0.52337205453])
param_inflexions = np.array([13058289.6634197, 8498256.18987648, 8468125.57387393, 3260125.76148595, 14280449.7428876])
param_carryover_rate = np.array([4.590262, 2.182036, 1.384337, 1.994616, 2.219991])
param_weekly_budget = 4015534.93538462  # np.array(4015534.93538462)

lb_ratio = np.array([0.0001, 0.0001, 0.0001, 0.0001, 0.0001])
ub_ratio = np.array([1.0, 1.0, 1.0, 1.0, 1.0])

lb = lb_ratio * param_weekly_budget
ub = ub_ratio * param_weekly_budget
x0 = lb

print(f"Lower bound: {list(lb)}")
print(f"Upper bound: {list(ub)}")
print(f"x0: {list(x0)}")
print()

        
def eval_f(X, grad):
    objective = -np.sum(np.fromiter((
        _fx_objective(x, coeff, alpha, inflexion, cr) for x, coeff, alpha, inflexion, cr in
            zip(X, param_coefs, param_alphas, param_inflexions, param_carryover_rate)
    ), dtype=float))

    # https://nlopt.readthedocs.io/en/latest/NLopt_Python_Reference/#assigning-results-in-place
    if grad.size > 0:
        grad[:] = np.array(np.fromiter((
            _fx_gradient(x, coeff, alpha, inflexion, cr) for x, coeff, alpha, inflexion, cr in
                zip(X, param_coefs, param_alphas, param_inflexions, param_carryover_rate)
        ), dtype=np.float64))

    return objective
    
def _fx_objective(x, coeff, alpha, inflexion, carryover_rate):
    # Adstock scales
    xAdstocked = x * carryover_rate

    # Hill transformation
    xOut = coeff * np.sum((1 + inflexion**alpha / xAdstocked**alpha)**-1)

    return xOut

def _fx_gradient(x, coeff, alpha, inflexion, carryover_rate):  # x_hist_carryover
    # Adstock scales
    xAdstocked = x * carryover_rate

    # Prevent division by 0
    if xAdstocked < 1e-40:
        return 1e-40

    xOut = -coeff * np.sum((alpha * (inflexion**alpha) * (xAdstocked**(alpha - 1))) / (xAdstocked**alpha + inflexion**alpha)**2)
    return(xOut)

def eval_g_eq(X, grad):
    constr = np.sum(X) - param_weekly_budget
    
    if grad.size > 0:
        grad[:] = np.repeat(1, len(X))

    return constr

opt = nlopt.opt(nlopt.LD_AUGLAG, len(x0))
opt.set_min_objective(eval_f)
opt.add_equality_constraint(eval_g_eq)  # , tol=0 , 1.0e-10
opt.set_lower_bounds(lb)
opt.set_upper_bounds(ub)
opt.set_xtol_rel(1.0e-10)
opt.set_maxeval(10000)

local_opt = nlopt.opt(nlopt.LD_SLSQP, len(x0))
local_opt.set_xtol_rel(1.0e-13)

opt.set_local_optimizer(local_opt)

optmSpendUnit = opt.optimize(x0)

print(f"Optimal media mix spend: {np.sum(optmSpendUnit)}")
for unit in optmSpendUnit:
    print(unit)
print()


# Budget to response
def fx_objective_chanel(x, coeff, alpha, inflexion, carryover_rate):  # x_hist_carryover
    # Adstock scales
    xAdstocked = x * carryover_rate   # + mean(x_hist_carryover)

    xOut = -coeff * np.sum((1 + inflexion**alpha / xAdstocked**alpha)**-1)
    return xOut

optmResponseUnit = -np.array(np.fromiter((
    fx_objective_chanel(x, coeff, alpha, inflexion, cr) for x, coeff, alpha, inflexion, cr in
        zip(optmSpendUnit, param_coefs, param_alphas, param_inflexions, param_carryover_rate)
), dtype=float))

print(f"Optimal media mix response: {np.sum(optmResponseUnit)}")
for unit in optmResponseUnit:
    print(unit)
print()

Potential reasons

Might it be that there is not mistake in my Python code, but an issue with the algorithms itself, as mentioned in these issues?:

The text was updated successfully, but these errors were encountered:

NumesSanguis changed the title ~~Discrepancy Python and R version - Python version not returning optimal response~~ Discrepancy Python and R version - Python version not returning optimal response | LD_AUGLAG & LD_SLSQP Aug 9, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Discrepancy Python and R version - Python version not returning optimal response | LD_AUGLAG & LD_SLSQP #560

Discrepancy Python and R version - Python version not returning optimal response | LD_AUGLAG & LD_SLSQP #560

NumesSanguis commented Aug 9, 2024

Discrepancy Python and R version - Python version not returning optimal response | LD_AUGLAG & LD_SLSQP #560

Discrepancy Python and R version - Python version not returning optimal response | LD_AUGLAG & LD_SLSQP #560

Comments

NumesSanguis commented Aug 9, 2024

Output

R

Python

R code

Python code

Potential reasons