diff --git a/examples/ga/nsga3_optimize_outputs_reg.py b/examples/ga/nsga3_optimize_outputs_reg.py new file mode 100644 index 000000000..e50db645c --- /dev/null +++ b/examples/ga/nsga3_optimize_outputs_reg.py @@ -0,0 +1,156 @@ +from math import factorial +import random + +import matplotlib.pyplot as plt +import numpy +import pymop.factory + +from deap import algorithms +from deap import base +from deap.benchmarks.tools import igd +from deap import creator +from deap import tools + +from sklearn.datasets import make_regression +from sklearn.model_selection import train_test_split +from sklearn.neural_network import MLPRegressor + +#This is in reference to issue number #667. +# How do you use NSGA3 with real data? I'm trying to optimize the outputs of a multioutput regression problem, but +#based on the examples I'm confused where the data would actually go? Especially because all of the values are already +# known. It's not an equation, so I'm confused about how that would change the problem definition. +# Do you normalize your outputs and include them as reference points? + + + +###########################################Synthetic Outputs###################################### +X, y = make_regression(n_samples=1000, n_features=10, n_targets=3, random_state=1, noise=0.5) +xtrain, xtest, ytrain, ytest=train_test_split(X, y, test_size=0.8) + +model = MLPRegressor(max_iter=2000) +model.fit(xtrain, ytrain) + +pred = model.predict(xtest) #<- How would you optimize/minimize the predictions? + +################################################################################################### + +# Problem definition +PROBLEM = "dtlz2" +NOBJ = 3 +K = 10 +NDIM = NOBJ + K - 1 +P = 12 +H = factorial(NOBJ + P - 1) / (factorial(P) * factorial(NOBJ - 1)) +BOUND_LOW, BOUND_UP = 0.0, 1.0 +problem = pymop.factory.get_problem(PROBLEM, n_var=NDIM, n_obj=NOBJ) +## + +# Algorithm parameters +MU = int(H + (4 - H % 4)) +NGEN = 400 +CXPB = 1.0 +MUTPB = 1.0 +## + +# Create uniform reference point +ref_points = tools.uniform_reference_points(NOBJ, P) + +# Create classes +creator.create("FitnessMin", base.Fitness, weights=(-1.0,) * NOBJ) +creator.create("Individual", list, fitness=creator.FitnessMin) +## + + +# Toolbox initialization +def uniform(low, up, size=None): + try: + return [random.uniform(a, b) for a, b in zip(low, up)] + except TypeError: + return [random.uniform(a, b) for a, b in zip([low] * size, [up] * size)] + +toolbox = base.Toolbox() +toolbox.register("attr_float", uniform, BOUND_LOW, BOUND_UP, NDIM) +toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.attr_float) +toolbox.register("population", tools.initRepeat, list, toolbox.individual) + +toolbox.register("evaluate", problem.evaluate, return_values_of=["F"]) +toolbox.register("mate", tools.cxSimulatedBinaryBounded, low=BOUND_LOW, up=BOUND_UP, eta=30.0) +toolbox.register("mutate", tools.mutPolynomialBounded, low=BOUND_LOW, up=BOUND_UP, eta=20.0, indpb=1.0/NDIM) +toolbox.register("select", tools.selNSGA3, ref_points=ref_points) +## + + +def main(seed=None): + random.seed(seed) + + # Initialize statistics object + stats = tools.Statistics(lambda ind: ind.fitness.values) + stats.register("avg", numpy.mean, axis=0) + stats.register("std", numpy.std, axis=0) + stats.register("min", numpy.min, axis=0) + stats.register("max", numpy.max, axis=0) + + logbook = tools.Logbook() + logbook.header = "gen", "evals", "std", "min", "avg", "max" + + pop = toolbox.population(n=MU) + + # Evaluate the individuals with an invalid fitness + invalid_ind = [ind for ind in pop if not ind.fitness.valid] + fitnesses = toolbox.map(toolbox.evaluate, invalid_ind) + for ind, fit in zip(invalid_ind, fitnesses): + ind.fitness.values = fit + + # Compile statistics about the population + record = stats.compile(pop) + logbook.record(gen=0, evals=len(invalid_ind), **record) + print(logbook.stream) + + # Begin the generational process + for gen in range(1, NGEN): + offspring = algorithms.varAnd(pop, toolbox, CXPB, MUTPB) + + # Evaluate the individuals with an invalid fitness + invalid_ind = [ind for ind in offspring if not ind.fitness.valid] + fitnesses = toolbox.map(toolbox.evaluate, invalid_ind) + for ind, fit in zip(invalid_ind, fitnesses): + ind.fitness.values = fit + + # Select the next generation population from parents and offspring + pop = toolbox.select(pop + offspring, MU) + + # Compile statistics about the new population + record = stats.compile(pop) + logbook.record(gen=gen, evals=len(invalid_ind), **record) + print(logbook.stream) + + return pop, logbook + + +if __name__ == "__main__": + pop, stats = main() + pop_fit = numpy.array([ind.fitness.values for ind in pop]) + + pf = problem.pareto_front(ref_points) + print(igd(pop_fit, pf)) + + import matplotlib.pyplot as plt + import mpl_toolkits.mplot3d as Axes3d + + fig = plt.figure(figsize=(7, 7)) + ax = fig.add_subplot(111, projection="3d") + + p = numpy.array([ind.fitness.values for ind in pop]) + ax.scatter(p[:, 0], p[:, 1], p[:, 2], marker="o", s=24, label="Final Population") + + ax.scatter(pf[:, 0], pf[:, 1], pf[:, 2], marker="x", c="k", s=32, label="Ideal Pareto Front") + + ref_points = tools.uniform_reference_points(NOBJ, P) + + ax.scatter(ref_points[:, 0], ref_points[:, 1], ref_points[:, 2], marker="o", s=24, label="Reference Points") + + ax.view_init(elev=11, azim=-25) + ax.autoscale(tight=True) + plt.legend() + plt.tight_layout() + plt.savefig("nsga3.png") \ No newline at end of file