-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfeature_selection.py
More file actions
95 lines (82 loc) · 3.89 KB
/
feature_selection.py
File metadata and controls
95 lines (82 loc) · 3.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
""" @author: Sam Parry u1008557 """
import numpy as np
from numpy import *
import pandas as pd
from time import time
from bingo.evaluation.evaluation import Evaluation
from bingo.evolutionary_algorithms.age_fitness import AgeFitnessEA
from bingo.evolutionary_optimizers.island import Island
from bingo.local_optimizers.continuous_local_opt import ContinuousLocalOptimization
from bingo.symbolic_regression.agraph.generator import AGraphGenerator
from bingo.symbolic_regression.agraph.component_generator import ComponentGenerator
from bingo.symbolic_regression.agraph.crossover import AGraphCrossover
from bingo.symbolic_regression.agraph.mutation import AGraphMutation
from bingo.symbolic_regression.explicit_regression import ExplicitTrainingData
from bingo.symbolic_regression.explicit_regression import ExplicitRegression
from bingo.stats.pareto_front import ParetoFront
# Global parameters
POP_SIZE = 300
STACK_SIZE = 20 # low stack size for quick runs
MUTATION_PROBABILITY = 0.4
CROSSOVER_PROBABILITY = 0.4
MAX_GENS = 1_000 # low gens. No need for large number
FIT_TOL = 1e-3
use_simplification = True
regression_metric = 'rmse'
clo_algorithm = 'lm'
# If I want a scalar fitness -> BFGS
# If I want a vectorized fitness -> lm (faster)
def agraph_similarity(ag_1, ag_2):
"""a similarity metric between agraphs"""
return ag_1.fitness == ag_2.fitness and ag_1.get_complexity() == ag_2.get_complexity()
def get_generators(x_data, stack_size: int, use_simplification: bool):
"""
Create and return the agraph, crossover, and mutation generators.
:param x_data: numpy array of training data.
:param stack_size: Maximum stack size for AGraph.
:param use_simplification: Use simplification in AGraphGenerator.
:return: agraph_gen, crossover, mutation
"""
component_generator = ComponentGenerator(x_data.shape[1], constant_probability=0.1, num_initial_load_statements=1)
component_generator.add_operator("+")
component_generator.add_operator("-")
component_generator.add_operator("*")
component_generator.add_operator("/")
component_generator.add_operator("cosh")
component_generator.add_operator("sinh")
component_generator.add_operator("exp")
component_generator.add_operator("log")
component_generator.add_operator("sqrt")
agraph_gen = AGraphGenerator(stack_size, component_generator=component_generator,use_simplification=use_simplification)
crossover = AGraphCrossover()
mutation = AGraphMutation(component_generator)
return agraph_gen, crossover, mutation
def print_pareto_front(pareto_front):
"""
Print the members of the pareto front in sympy format.
"""
print(" FITNESS COMPLEXITY EQUATION")
for member in pareto_front:
eq = member.get_formatted_string("sympy")
print("%.3e " % member.fitness, member.get_complexity(), " f(X_0) =", eq)
def main(X,y):
# add super feature(s) here
# Agraph generation/variation
agraph_gen, crossover, mutation = get_generators(X, STACK_SIZE, use_simplification)
# Explicit evaluation & CLO
training_data = ExplicitTrainingData(X, y)
fitness = ExplicitRegression(training_data=training_data, metric=regression_metric)
local_opt_fitness = ContinuousLocalOptimization(fitness, algorithm=clo_algorithm)
evaluator = Evaluation(local_opt_fitness)
# Evolution
ea = AgeFitnessEA(evaluator, agraph_gen, crossover, mutation,
CROSSOVER_PROBABILITY, MUTATION_PROBABILITY, POP_SIZE)
pareto_front = ParetoFront(secondary_key=lambda ag: ag.get_complexity(),
similarity_function=agraph_similarity)
#t = time()
island = Island(ea, agraph_gen, POP_SIZE, hall_of_fame=pareto_front)
island.evolve_until_convergence(max_generations=MAX_GENS, fitness_threshold=FIT_TOL)
#print(f'Elapsed Time: {round((time() - t) / 60, 2)}min')
print_pareto_front(pareto_front)
if __name__ == '__main__':
main()