深度神经网络

时间:2021-03-10 06:45:26

学习目标:

使用ReLU等非线性单元提升模型性能,构建深度神经网络,执行便于使用的神经网络类

需要定义的函数:

def initialize_parameters_deep(layer_dims): #layer_dims是包含每层隐藏单元数量的array

np.random.seed(1)

parameters = {}

L = len(layer_dims) 

for l in range(1, L):

parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01

parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))

return parameters

-------------------------------------------------------

def linear_forward(A, W, b):

Z = np.dot(W, A) + b

cache = (A, W, b) # for backward

return Z, cache

-----------------------------------------------------

def linear_activation_forward(A_prev, W, b, activation):

if activation == 'sigmoid':

Z, linear_cache = linear_forward(A_prev, W, b)

A, activation_cache = sigmoid(Z)

elif activation == 'relu':

Z, linear_cache = linear_forward(A_prev, W, b)

A, activation_cache = relu(Z)

cache = (linear_cache, activation_cache)

return A, cache

--------------------------------------------------------

def L_model_forward(X, parameters):

caches = []

A = X

L = len(parameters) // 2

for l in range(1, L):

A_prev = A

A, cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], activation='relu')

caches.append(cache)

AL, cache = linear_activation_forward(A, parameters['W' + str(L)}, parameters['b' + str(L)}, activation = 'sigmoid)

caches.append(cache)

return AL, caches

------------------------------------------------------------

def compute_cost(AL, Y):

m = Y.shape[1]

cost = - np.sum(np.multiply(Y, np.log(AL)) + np.multiply((1-Y), np.log(1-AL))) / m

cost = np.squeeze(cost)

return cost

------------------------------------------------------------

def linear_backward(dZ, cache)

A_prev, W, b = cache

m = A_prev.shape[1]

dW = np.dot(dZ, A_prev.T) / m

db = np.sum(dZ, axis=1, keepdims=True) / m

dA_prev = np.dot(W.T, dZ)

return dA_prev, dW, db

-------------------------------------------------------------

def linear_activation_backward(dA, cache, activation):

linear_cache, activation_cache = cache

if activation == 'relu':

dZ = relu_backward(dA, cache[1]) #

dA_prev, dW, db = linear_backward(dZ, cache[0])

elif activation == 'sigmoid':

dZ = sigmoid_backward(dA, cache[1])

dA_prev, dW, db = linear_backward(dZ, cache[0])

return dA_prev, dW, db

--------------------------------------------------------------

def L_model_backward(AL, Y, caches):

grads = {}

L = len(caches)

m = AL.shape[1]

Y = Y.reshape(AL.shape)

dAL = - (np.divide(Y, AL) - np.divide(1-Y, 1-AL))

current_cache = cache[L-1]

grads['dA' + str(L)], grads['dW' + str(L)], grads['db' +str(L)] = linear_activation_backward(dAL, current_cache, activation='sigmoid')

for l in reversed(range(L-1)):

current_cache = caches[l]

dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads['dA' + str(l+2)], current_cache, activation='relu')

grads['dA' + str(l+1)] = dA_prev_temp

grads['dW' + str(l+1)] = dW_temp

grads['db' + str(l+1)] = db_temp

return grads

-------------------------------------------------------------------

def update_parameters(parameters, grads, learning_rate):

L = len(parameters) // 2

for l in range(1, L):

parameters['W' + str(l)] = parameters['W' + str(l)] - learning_rate * grads['dW' +str(l)]

parameters['b' + str(l)] = parameters['b' +str(l)] - learning_rate * grads['db' +str(l)]

return parameters

模型:

def L_layer_model(X, Y, layers_dims, learning_rate = 0.0075, num_iterations = 3000, print_cost=False):

np.random.seed(1)

costs = []

parameters = initialize_parameters_deep(layers_dim)

for i in range(0, num_iterations):

AL, caches = L_model_foward(X, parameters)

cost = compute_cost(AL, Y)

grads = L_model_backward(AL, Y, caches)

parameters = update_parameters(parameters, grads, learning_rate)

return parameters