遗传算法属于一种优化算法。
如果你有一个待优化函数,可以考虑次算法。假设你有一个变量x,通过某个函数可以求出对应的y,那么你通过预设的x可求出y_pred,y_pred差距与你需要的y当然越接近越好,这就需要引入适应度(fitness)的概念。假设
fitness = 1/(1+ads(y_pred - y)),那么误差越小,适应度越大,即该个体越易于存活。
设计该算法的思路如下:
(1)初始化种群,即在我需要的区间如[-100,100]内random一堆初始个体[x1,x2,x3...],这些个体是10进制形式的,为了后面的交叉与变异我们不妨将其转化为二进制形式。那么现在的问题是二进制取多少位合适呢?即编码(code)的长度是多少呢?
这就涉及一些信号方面的知识,比如两位的二进制表示的最大值是3(11),可以将区间化为4分,那么每一份区间range长度range/4,我们只需要让range/n小于我们定义的精度即可。n是二进制需要表示的最大,可以反解出二进制位数 。
(2)我们需要编写编码与解码函数。即code:将x1,x2...化为二进制,decode:在交叉变异后重新得到十进制数,用于计算fitness。
(3)交叉后变异函数编写都很简单,random一个point,指定两个x在point位置进行切片交换即是交叉。变异也是random一个point,让其值0变为1,1变为0。
(4)得到交叉变异后的个体,需要计算fitness进行种群淘汰,保留fitness最高的一部分种群。
(5)将最优的个体继续上面的操作,直到你定义的iteration结束为止。
不说了,上代码:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
|
import numpy as np
import pandas as pd
import random
from scipy.optimize import fsolve
import matplotlib.pyplot as plt
import heapq
from sklearn.model_selection import train_test_split
from tkinter import _flatten
from sklearn.utils import shuffle
from sklearn import preprocessing
from sklearn.decomposition import PCA
from matplotlib import rcParams
# 求染色体长度
def getEncodeLength(decisionvariables, delta):
# 将每个变量的编码长度放入数组
lengths = []
for decisionvar in decisionvariables:
uper = decisionvar[ 1 ]
low = decisionvar[ 0 ]
# res()返回一个数组
res = fsolve( lambda x: ((uper - low) / delta - 2 * * x + 1 ), 30 )
# ceil()向上取整
length = int (np.ceil(res[ 0 ]))
lengths.append(length)
# print("染色体长度:", lengths)
return lengths
# 随机生成初始化种群
def getinitialPopulation(length, populationSize):
chromsomes = np.zeros((populationSize, length), dtype = np. int )
for popusize in range (populationSize):
# np.random.randit()产生[0,2)之间的随机整数,第三个参数表示随机数的数量
chromsomes[popusize, :] = np.random.randint( 0 , 2 , length)
return chromsomes
# 染色体解码得到表现形的解
def getDecode(population, encodelength, decisionvariables, delta):
# 得到population中有几个元素
populationsize = population.shape[ 0 ]
length = len (encodelength)
decodeVariables = np.zeros((populationsize, length), dtype = np. float )
# 将染色体拆分添加到解码数组decodeVariables中
for i, populationchild in enumerate (population):
# 设置起始点
start = 0
for j, lengthchild in enumerate (encodelength):
power = lengthchild - 1
decimal = 0
start_end = start + lengthchild
for k in range (start, start_end):
# 二进制转为十进制
decimal + = populationchild[k] * ( 2 * * power)
power = power - 1
# 从下一个染色体开始
start = start_end
lower = decisionvariables[j][ 0 ]
uper = decisionvariables[j][ 1 ]
# 转换为表现形
decodevalue = lower + decimal * (uper - lower) / ( 2 * * lengthchild - 1 )
# 将解添加到数组中
decodeVariables[i][j] = decodevalue
return decodeVariables
# 选择新的种群
def selectNewPopulation(decodepopu, cum_probability):
# 获取种群的规模和
m, n = decodepopu.shape
# 初始化新种群
newPopulation = np.zeros((m, n))
for i in range (m):
# 产生一个0到1之间的随机数
randomnum = np.random.random()
# 轮盘赌选择
for j in range (m):
if (randomnum < cum_probability[j]):
newPopulation[i] = decodepopu[j]
break
return newPopulation
# 新种群交叉
def crossNewPopulation(newpopu, prob):
m, n = newpopu.shape
# uint8将数值转换为无符号整型
numbers = np.uint8(m * prob)
# 如果选择的交叉数量为奇数,则数量加1
if numbers % 2 ! = 0 :
numbers = numbers + 1
# 初始化新的交叉种群
updatepopulation = np.zeros((m, n), dtype = np.uint8)
# 随机生成需要交叉的染色体的索引号
index = random.sample( range (m), numbers)
# 不需要交叉的染色体直接复制到新的种群中
for i in range (m):
if not index.__contains__(i):
updatepopulation[i] = newpopu[i]
# 交叉操作
j = 0
while j < numbers:
# 随机生成一个交叉点,np.random.randint()返回的是一个列表
crosspoint = np.random.randint( 0 , n, 1 )
crossPoint = crosspoint[ 0 ]
# a = index[j]
# b = index[j+1]
updatepopulation[index[j]][ 0 :crossPoint] = newpopu[index[j]][ 0 :crossPoint]
updatepopulation[index[j]][crossPoint:] = newpopu[index[j + 1 ]][crossPoint:]
updatepopulation[index[j + 1 ]][ 0 :crossPoint] = newpopu[j + 1 ][ 0 :crossPoint]
updatepopulation[index[j + 1 ]][crossPoint:] = newpopu[index[j]][crossPoint:]
j = j + 2
return updatepopulation
# 变异操作
def mutation(crosspopulation, mutaprob):
# 初始化变异种群
mutationpopu = np.copy(crosspopulation)
m, n = crosspopulation.shape
# 计算需要变异的基因数量
mutationnums = np.uint8(m * n * mutaprob)
# 随机生成变异基因的位置
mutationindex = random.sample( range (m * n), mutationnums)
# 变异操作
for geneindex in mutationindex:
# np.floor()向下取整返回的是float型
row = np.uint8(np.floor(geneindex / n))
colume = geneindex % n
if mutationpopu[row][colume] = = 0 :
mutationpopu[row][colume] = 1
else :
mutationpopu[row][colume] = 0
return mutationpopu
# 找到重新生成的种群中适应度值最大的染色体生成新种群
def findMaxPopulation(population, maxevaluation, maxSize):
#将数组转换为列表
#maxevalue = maxevaluation.flatten()
maxevaluelist = maxevaluation
# 找到前100个适应度最大的染色体的索引
maxIndex = map (maxevaluelist.index, heapq.nlargest(maxSize, maxevaluelist))
index = list (maxIndex)
colume = population.shape[ 1 ]
# 根据索引生成新的种群
maxPopulation = np.zeros((maxSize, colume))
i = 0
for ind in index:
maxPopulation[i] = population[ind]
i = i + 1
return maxPopulation
# 得到每个个体的适应度值及累计概率
def getFitnessValue(decode,x_train,y_train):
# 得到种群的规模和决策变量的个数
popusize, decisionvar = decode.shape
fitnessValue = []
for j in range ( len (decode)):
W1 = decode[j][ 0 : 20 ].reshape( 4 , 5 )
V1 = decode[j][ 20 : 25 ].T
W2 = decode[j][ 25 : 45 ].reshape( 5 , 4 )
V2 = decode[j][ 45 :].T
error_all = []
for i in range ( len (x_train)):
#get values of hidde layer
X2 = sigmoid(x_train[i].T.dot(W1) + V1)
#get values of prediction y
Y_hat = sigmoid(X2.T.dot(W2) + V2)
#get error when input dimension is i
error = sum ( abs (Y_hat - y_train[i]))
error_all.append(error)
#get fitness when W and V is j
fitnessValue.append( 1 / ( 1 + sum (error_all)))
# 得到每个个体被选择的概率
probability = fitnessValue / np. sum (fitnessValue)
# 得到每个染色体被选中的累积概率,用于轮盘赌算子使用
cum_probability = np.cumsum(probability)
return fitnessValue, cum_probability
def getFitnessValue_accuracy(decode,x_train,y_train):
# 得到种群的规模和决策变量的个数
popusize, decisionvar = decode.shape
fitnessValue = []
for j in range ( len (decode)):
W1 = decode[j][ 0 : 20 ].reshape( 4 , 5 )
V1 = decode[j][ 20 : 25 ].T
W2 = decode[j][ 25 : 45 ].reshape( 5 , 4 )
V2 = decode[j][ 45 :].T
accuracy = []
for i in range ( len (x_train)):
#get values of hidde layer
X2 = sigmoid(x_train[i].T.dot(W1) + V1)
#get values of prediction y
Y_hat = sigmoid(X2.T.dot(W2) + V2)
#get error when input dimension is i
accuracy.append( sum ( abs (np. round (Y_hat) - y_train[i])))
fitnessValue.append( sum ([m = = 0 for m in accuracy]) / len (accuracy))
# 得到每个个体被选择的概率
probability = fitnessValue / np. sum (fitnessValue)
# 得到每个染色体被选中的累积概率,用于轮盘赌算子使用
cum_probability = np.cumsum(probability)
return fitnessValue, cum_probability
def getXY():
# 要打开的文件名
data_set = pd.read_csv( 'all-bp.csv' , header = None )
# 取出“特征”和“标签”,并做了转置,将列转置为行
X_minMax1 = data_set.iloc[:, 0 : 12 ].values
# 前12列是特征
min_max_scaler = preprocessing.MinMaxScaler()
X_minMax = min_max_scaler.fit_transform(X_minMax1) # 0-1 range
transfer = PCA(n_components = 0.9 )
data1 = transfer.fit_transform(X_minMax)
#print('PCA processed shape:',data1.shape)
X = data1
Y = data_set.iloc[ : , 12 : 16 ].values # 后3列是标签
# 分训练和测试集
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.3 )
return x_train, x_test, y_train, y_test
def sigmoid(z):
return 1 / ( 1 + np.exp( - z))
|
上面的计算适应度函数需要自己更具实际情况调整。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
|
optimalvalue = []
optimalvariables = []
# 两个决策变量的上下界,多维数组之间必须加逗号
decisionVariables = [[ - 100 , 100 ]] * 49
# 精度
delta = 0.001
# 获取染色体长度
EncodeLength = getEncodeLength(decisionVariables, delta)
# 种群数量
initialPopuSize = 100
# 初始生成100个种群,20,5,20,4分别对用W1,V1,W2,V2
population = getinitialPopulation( sum (EncodeLength), initialPopuSize)
print ( "polpupation.shape:" ,population.shape)
# 最大进化代数
maxgeneration = 4000
# 交叉概率
prob = 0.8
# 变异概率
mutationprob = 0.5
# 新生成的种群数量
maxPopuSize = 30
x_train, x_test, y_train, y_test = getXY()
for generation in range (maxgeneration):
# 对种群解码得到表现形
print (generation)
decode = getDecode(population, EncodeLength, decisionVariables, delta)
#print('the shape of decode:',decode.shape
# 得到适应度值和累计概率值
evaluation, cum_proba = getFitnessValue_accuracy(decode,x_train,y_train)
# 选择新的种群
newpopulations = selectNewPopulation(population, cum_proba)
# 新种群交叉
crossPopulations = crossNewPopulation(newpopulations, prob)
# 变异操作
mutationpopulation = mutation(crossPopulations, mutationprob)
# 将父母和子女合并为新的种群
totalpopulation = np.vstack((population, mutationpopulation))
# 最终解码
final_decode = getDecode(totalpopulation, EncodeLength, decisionVariables, delta)
# 适应度评估
final_evaluation, final_cumprob = getFitnessValue_accuracy(final_decode,x_train,y_train)
#选出适应度最大的100个重新生成种群
population = findMaxPopulation(totalpopulation, final_evaluation, maxPopuSize)
# 找到本轮中适应度最大的值
optimalvalue.append(np. max (final_evaluation))
index = np.where(final_evaluation = = max (final_evaluation))
optimalvariables.append( list (final_decode[index[ 0 ][ 0 ]]))
|
1
2
3
4
5
6
7
8
9
10
11
12
|
fig = plt.figure(dpi = 160 ,figsize = ( 5 , 4 ))
config = {
"font.family" : "serif" , #serif
"font.size" : 10 ,
"mathtext.fontset" : 'stix' ,
}
rcParams.update(config)
plt.plot(np.arange( len (optimalvalue)), optimalvalue, color = "y" , lw = 0.8 , ls = '-' , marker = 'o' , ms = 8 )
# 图例设置
plt.xlabel( 'Iteration' )
plt.ylabel( 'Accuracy' )
plt.show()
|
以上就是python实现高效的遗传算法的详细内容,更多关于python遗传算法的资料请关注服务器之家其它相关文章!
原文链接:https://blog.csdn.net/qq_40819197/article/details/115423902