Python数据处理 PCA/ZCA 白化
课后练习
PCA, PCA whitening and ZCA whitening in 2D
Step 0: Load data
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
x = np.loadtxt('pca_2d/pcaData.txt')
x = x.T
plt.scatter(x[:, 0], x[:, 1], marker='o', color='', edgecolors='b')
x -= np.mean(x, axis=0)
plt.scatter(x[:, 0], x[:, 1], marker='o', color='', edgecolors='g')
plt.show()

Step 1: Implement PCA
cov = np.dot(x.T, x) / x.shape[0]
U,S,V = np.linalg.svd(cov)
xRot = np.dot(x, U.T)
plt.scatter(xRot[:, 0], xRot[:, 1], marker='o', color='', edgecolors='b')
plt.show()

Step 2:Dimension reduce and replot
k = 1
xReduce = np.dot(x,U[:,0:k])
xHat = np.concatenate((xReduce, np.zeros(shape=(x.shape[0], x.shape[1] - k))), axis=1)
xHat = xHat.dot(U.T)
plt.scatter(xHat[:, 0], xHat[:, 1], marker='o', color='', edgecolors='b')

Step 3: PCA Whitening
e = 1e-5
xPCAwhite = xRot * (np.diag(1./np.sqrt(np.diag(S)+e)))
plt.scatter(xPCAwhite[:,0], xPCAwhite[:,1],marker='o',color='', edgecolors='b')

Step 4: ZCA Whitening
xZCAwhite = xPCAwhite.dot(U)
plt.scatter(xZCAwhite[:,0], xZCAwhite[:,1],marker='o', color='', edgecolors='b')

PCA and Whitening on natural images
Step 0: Prepare data
Step 0a: Load data
from scipy.io import loadmat
data = loadmat('pca_exercise/IMAGES_RAW.mat')
imgs = data['IMAGESr']
plt.imshow(imgs[:,:,0], cmap='gray')
plt.show()

patch_size = 12
num_patches = 10000
patches = np.zeros((num_patches, patch_size*patch_size))
p = 0
num_imgs = imgs.shape[2]
for im in range(num_imgs):
num_samples = num_patches // num_imgs
for s in range(num_samples):
y = np.random.randint(imgs.shape[0] - patch_size + 1)
x = np.random.randint(imgs.shape[1] - patch_size + 1)
sample = imgs[y:y+patch_size, x:x+patch_size, im]
patches[p,:] = np.reshape(sample, (patch_size*patch_size))
p += 1
def display_patches(samples, num_rows, num_cols, padding_size):
display_height = num_rows*patch_size + (num_rows+1)*padding_size
display_width = num_cols*patch_size + (num_cols+1)*padding_size
display_imgs = np.full((display_height, display_width), -1.0)
samples -= np.mean(samples)
for i in range(samples.shape[0]):
row = i // num_rows
col = i % num_cols
vertical_start = (col+1)*padding_size + col*patch_size
vertical_end = vertical_start + patch_size
horizontal_start = (row+1)*padding_size + row*patch_size
horizontal_end = horizontal_start + patch_size
clim = np.max(np.abs(samples[i,:]))
display_imgs[horizontal_start:horizontal_end, vertical_start:vertical_end] = np.reshape(samples[i,:]/clim, (patch_size, patch_size))
plt.figure(figsize=(10,10))
plt.imshow(display_imgs, cmap='gray')
plt.show()
num_rows = 10
num_cols = 10
padding_size = 1
sample_index = np.random.randint(patches.shape[0], size=num_rows*num_cols)
samples = patches[sample_index, :]
display_patches(samples, num_rows, num_cols, padding_size)

Step 0b: Zero mean the data
patches = (patches.T - np.mean(patches, axis=1)).T
Step 1: Implement PCA
Step 1a: Implement PCA
cov = np.dot(patches.T, patches) / patches.shape[0]
U,S,V = np.linalg.svd(cov)
pRot = np.dot(patches, U.T)
Step 1b: Check covariance
cov_rot = np.dot(pRot.T, pRot) / pRot.shape[0]
plt.imshow(cov_rot)
plt.show()

Step 2: Find number of components to retain
k = 0
for i, p in enumerate((np.cumsum(S) / np.sum(S)), 1):
if p > 0.99:
k = i
break
Step 3: PCA with dimension reduction
pReduce = np.dot(patches,U[:,:k])
pHat = np.concatenate((pReduce, np.zeros(shape=(patches.shape[0], patches.shape[1] - k))), axis=1)
pHat = pHat.dot(U.T)
samples = pHat[sample_index, :]
display_patches(samples, num_rows, num_cols, padding_size)
plt.figure()
low_dim_k = 20
pReduce = np.dot(patches,U[:,:low_dim_k])
pHat = np.concatenate((pReduce, np.zeros(shape=(patches.shape[0], patches.shape[1] - low_dim_k))), axis=1)
pHat = pHat.dot(U.T)
samples = pHat[sample_index, :]
display_patches(samples, num_rows, num_cols, padding_size)

<matplotlib.figure.Figure at 0x1252ccf7668>

Step 4: PCA with whitening and regularization
Step 4a: Implement PCA with whitening and regularization
Step 4b: Check covariance
epsilon = 1e-9
pPCAwhite = pRot * (np.diag(1./np.sqrt(np.diag(S)+epsilon)))
cov_PCAwhite = np.dot(pPCAwhite.T, pPCAwhite) / pPCAwhite.shape[0]
plt.imshow(cov_PCAwhite)
plt.show()

epsilon = 0.1
pPCAwhite = pRot * (np.diag(1./np.sqrt(np.diag(S)+epsilon)))
cov_PCAwhite = np.dot(pPCAwhite.T, pPCAwhite) / pPCAwhite.shape[0]
plt.imshow(cov_PCAwhite)
plt.show()

Step 5: ZCA whitening
pZCAwhite = pPCAwhite.dot(U)
samples = pZCAwhite[sample_index, :]
display_patches(samples, num_rows, num_cols, padding_size)
plt.figure()
samples = patches[sample_index, :]
display_patches(samples, num_rows, num_cols, padding_size)

<matplotlib.figure.Figure at 0x1252c8d62b0>
