From b4257afc5983586dd8ff2ae37bc11a5e6be586bb Mon Sep 17 00:00:00 2001
From: ethantenison <tenison.ethan@gmail.com>
Date: Sun, 30 Oct 2022 18:35:47 -0500
Subject: [PATCH 1/2] Optimizing outputs from a multioutput regression problem
 with NSGA-III

---
 examples/ga/nsga3_realdata.py | 156 ++++++++++++++++++++++++++++++++++
 1 file changed, 156 insertions(+)
 create mode 100644 examples/ga/nsga3_realdata.py

diff --git a/examples/ga/nsga3_realdata.py b/examples/ga/nsga3_realdata.py
new file mode 100644
index 000000000..e50db645c
--- /dev/null
+++ b/examples/ga/nsga3_realdata.py
@@ -0,0 +1,156 @@
+from math import factorial
+import random
+
+import matplotlib.pyplot as plt
+import numpy
+import pymop.factory
+
+from deap import algorithms
+from deap import base
+from deap.benchmarks.tools import igd
+from deap import creator
+from deap import tools
+
+from sklearn.datasets import make_regression
+from sklearn.model_selection import train_test_split
+from sklearn.neural_network import MLPRegressor
+
+#This is in reference to issue number #667.
+# How do you use NSGA3 with real data? I'm trying to optimize the outputs of a multioutput regression problem, but
+#based on the examples I'm confused where the data would actually go? Especially because all of the values are already
+# known. It's not an equation, so I'm confused about how that would change the problem definition.
+# Do you normalize your outputs and include them as reference points?
+
+
+
+###########################################Synthetic Outputs######################################
+X, y = make_regression(n_samples=1000, n_features=10, n_targets=3, random_state=1, noise=0.5)
+xtrain, xtest, ytrain, ytest=train_test_split(X, y, test_size=0.8)
+
+model = MLPRegressor(max_iter=2000)
+model.fit(xtrain, ytrain)
+
+pred = model.predict(xtest) #<-  How would you optimize/minimize the predictions?
+
+###################################################################################################
+
+# Problem definition
+PROBLEM = "dtlz2"
+NOBJ = 3
+K = 10
+NDIM = NOBJ + K - 1
+P = 12
+H = factorial(NOBJ + P - 1) / (factorial(P) * factorial(NOBJ - 1))
+BOUND_LOW, BOUND_UP = 0.0, 1.0
+problem = pymop.factory.get_problem(PROBLEM, n_var=NDIM, n_obj=NOBJ)
+##
+
+# Algorithm parameters
+MU = int(H + (4 - H % 4))
+NGEN = 400
+CXPB = 1.0
+MUTPB = 1.0
+##
+
+# Create uniform reference point
+ref_points = tools.uniform_reference_points(NOBJ, P)
+
+# Create classes
+creator.create("FitnessMin", base.Fitness, weights=(-1.0,) * NOBJ)
+creator.create("Individual", list, fitness=creator.FitnessMin)
+##
+
+
+# Toolbox initialization
+def uniform(low, up, size=None):
+    try:
+        return [random.uniform(a, b) for a, b in zip(low, up)]
+    except TypeError:
+        return [random.uniform(a, b) for a, b in zip([low] * size, [up] * size)]
+
+toolbox = base.Toolbox()
+toolbox.register("attr_float", uniform, BOUND_LOW, BOUND_UP, NDIM)
+toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.attr_float)
+toolbox.register("population", tools.initRepeat, list, toolbox.individual)
+
+toolbox.register("evaluate", problem.evaluate, return_values_of=["F"])
+toolbox.register("mate", tools.cxSimulatedBinaryBounded, low=BOUND_LOW, up=BOUND_UP, eta=30.0)
+toolbox.register("mutate", tools.mutPolynomialBounded, low=BOUND_LOW, up=BOUND_UP, eta=20.0, indpb=1.0/NDIM)
+toolbox.register("select", tools.selNSGA3, ref_points=ref_points)
+##
+
+
+def main(seed=None):
+    random.seed(seed)
+
+    # Initialize statistics object
+    stats = tools.Statistics(lambda ind: ind.fitness.values)
+    stats.register("avg", numpy.mean, axis=0)
+    stats.register("std", numpy.std, axis=0)
+    stats.register("min", numpy.min, axis=0)
+    stats.register("max", numpy.max, axis=0)
+
+    logbook = tools.Logbook()
+    logbook.header = "gen", "evals", "std", "min", "avg", "max"
+
+    pop = toolbox.population(n=MU)
+
+    # Evaluate the individuals with an invalid fitness
+    invalid_ind = [ind for ind in pop if not ind.fitness.valid]
+    fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
+    for ind, fit in zip(invalid_ind, fitnesses):
+        ind.fitness.values = fit
+
+    # Compile statistics about the population
+    record = stats.compile(pop)
+    logbook.record(gen=0, evals=len(invalid_ind), **record)
+    print(logbook.stream)
+
+    # Begin the generational process
+    for gen in range(1, NGEN):
+        offspring = algorithms.varAnd(pop, toolbox, CXPB, MUTPB)
+
+        # Evaluate the individuals with an invalid fitness
+        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
+        fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
+        for ind, fit in zip(invalid_ind, fitnesses):
+            ind.fitness.values = fit
+
+        # Select the next generation population from parents and offspring
+        pop = toolbox.select(pop + offspring, MU)
+
+        # Compile statistics about the new population
+        record = stats.compile(pop)
+        logbook.record(gen=gen, evals=len(invalid_ind), **record)
+        print(logbook.stream)
+
+    return pop, logbook
+
+
+if __name__ == "__main__":
+    pop, stats = main()
+    pop_fit = numpy.array([ind.fitness.values for ind in pop])
+
+    pf = problem.pareto_front(ref_points)
+    print(igd(pop_fit, pf))
+
+    import matplotlib.pyplot as plt
+    import mpl_toolkits.mplot3d as Axes3d
+
+    fig = plt.figure(figsize=(7, 7))
+    ax = fig.add_subplot(111, projection="3d")
+
+    p = numpy.array([ind.fitness.values for ind in pop])
+    ax.scatter(p[:, 0], p[:, 1], p[:, 2], marker="o", s=24, label="Final Population")
+
+    ax.scatter(pf[:, 0], pf[:, 1], pf[:, 2], marker="x", c="k", s=32, label="Ideal Pareto Front")
+
+    ref_points = tools.uniform_reference_points(NOBJ, P)
+
+    ax.scatter(ref_points[:, 0], ref_points[:, 1], ref_points[:, 2], marker="o", s=24, label="Reference Points")
+
+    ax.view_init(elev=11, azim=-25)
+    ax.autoscale(tight=True)
+    plt.legend()
+    plt.tight_layout()
+    plt.savefig("nsga3.png")
\ No newline at end of file

From cbdde12ada8063eb2ed05a1e13921cd13e26d1a3 Mon Sep 17 00:00:00 2001
From: ethantenison <tenison.ethan@gmail.com>
Date: Sun, 30 Oct 2022 18:37:04 -0500
Subject: [PATCH 2/2] Optimizing outputs from a multioutput regression problem
 with NSGA-III

---
 examples/ga/{nsga3_realdata.py => nsga3_optimize_outputs_reg.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename examples/ga/{nsga3_realdata.py => nsga3_optimize_outputs_reg.py} (100%)

diff --git a/examples/ga/nsga3_realdata.py b/examples/ga/nsga3_optimize_outputs_reg.py
similarity index 100%
rename from examples/ga/nsga3_realdata.py
rename to examples/ga/nsga3_optimize_outputs_reg.py