学习目标:
使用ReLU等非线性单元提升模型性能,构建深度神经网络,执行便于使用的神经网络类
需要定义的函数:
def initialize_parameters_deep(layer_dims): #layer_dims是包含每层隐藏单元数量的array
np.random.seed(1)
parameters = {}
L = len(layer_dims)
for l in range(1, L):
parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))
return parameters
-------------------------------------------------------
def linear_forward(A, W, b):
Z = np.dot(W, A) + b
cache = (A, W, b) # for backward
return Z, cache
-----------------------------------------------------
def linear_activation_forward(A_prev, W, b, activation):
if activation == 'sigmoid':
Z, linear_cache = linear_forward(A_prev, W, b)
A, activation_cache = sigmoid(Z)
elif activation == 'relu':
Z, linear_cache = linear_forward(A_prev, W, b)
A, activation_cache = relu(Z)
cache = (linear_cache, activation_cache)
return A, cache
--------------------------------------------------------
def L_model_forward(X, parameters):
caches = []
A = X
L = len(parameters) // 2
for l in range(1, L):
A_prev = A
A, cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], activation='relu')
caches.append(cache)
AL, cache = linear_activation_forward(A, parameters['W' + str(L)}, parameters['b' + str(L)}, activation = 'sigmoid)
caches.append(cache)
return AL, caches
------------------------------------------------------------
def compute_cost(AL, Y):
m = Y.shape[1]
cost = - np.sum(np.multiply(Y, np.log(AL)) + np.multiply((1-Y), np.log(1-AL))) / m
cost = np.squeeze(cost)
return cost
------------------------------------------------------------
def linear_backward(dZ, cache)
A_prev, W, b = cache
m = A_prev.shape[1]
dW = np.dot(dZ, A_prev.T) / m
db = np.sum(dZ, axis=1, keepdims=True) / m
dA_prev = np.dot(W.T, dZ)
return dA_prev, dW, db
-------------------------------------------------------------
def linear_activation_backward(dA, cache, activation):
linear_cache, activation_cache = cache
if activation == 'relu':
dZ = relu_backward(dA, cache[1]) #
dA_prev, dW, db = linear_backward(dZ, cache[0])
elif activation == 'sigmoid':
dZ = sigmoid_backward(dA, cache[1])
dA_prev, dW, db = linear_backward(dZ, cache[0])
return dA_prev, dW, db
--------------------------------------------------------------
def L_model_backward(AL, Y, caches):
grads = {}
L = len(caches)
m = AL.shape[1]
Y = Y.reshape(AL.shape)
dAL = - (np.divide(Y, AL) - np.divide(1-Y, 1-AL))
current_cache = cache[L-1]
grads['dA' + str(L)], grads['dW' + str(L)], grads['db' +str(L)] = linear_activation_backward(dAL, current_cache, activation='sigmoid')
for l in reversed(range(L-1)):
current_cache = caches[l]
dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads['dA' + str(l+2)], current_cache, activation='relu')
grads['dA' + str(l+1)] = dA_prev_temp
grads['dW' + str(l+1)] = dW_temp
grads['db' + str(l+1)] = db_temp
return grads
-------------------------------------------------------------------
def update_parameters(parameters, grads, learning_rate):
L = len(parameters) // 2
for l in range(1, L):
parameters['W' + str(l)] = parameters['W' + str(l)] - learning_rate * grads['dW' +str(l)]
parameters['b' + str(l)] = parameters['b' +str(l)] - learning_rate * grads['db' +str(l)]
return parameters
模型:
def L_layer_model(X, Y, layers_dims, learning_rate = 0.0075, num_iterations = 3000, print_cost=False):
np.random.seed(1)
costs = []
parameters = initialize_parameters_deep(layers_dim)
for i in range(0, num_iterations):
AL, caches = L_model_foward(X, parameters)
cost = compute_cost(AL, Y)
grads = L_model_backward(AL, Y, caches)
parameters = update_parameters(parameters, grads, learning_rate)
return parameters