from time import time
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import offsetbox
from sklearn.decomposition import SparsePCA
from sklearn import manifold, datasets, decomposition, random_projection

digits = datasets.load_digits(n_class=9)
X = digits.data
y = digits.target
n_samples, n_features = X.shape
n_neighbors = 30


n_img_per_row = 20
img = np.zeros((10 * n_img_per_row, 10 * n_img_per_row))
for i in range(n_img_per_row):
    ix = 10 * i + 1
    for j in range(n_img_per_row):
        iy = 10 * j + 1
        img[ix : ix + 8, iy : iy + 8] = X[i * n_img_per_row + j].reshape((8, 8))

plt.figure(num=None, figsize=(12, 8), dpi=80, facecolor="w", edgecolor="k")
plt.imshow(img, cmap=plt.cm.binary)
plt.xticks([])
plt.yticks([])
plt.title("A selection from the 64-dimensional digits dataset")

Text(0.5, 1.0, 'A selection from the 64-dimensional digits dataset')


print("Computing random projection")
rp = random_projection.GaussianRandomProjection(n_components=2, random_state=42)
X_projected = rp.fit_transform(X)

Computing random projection


def plot_embedding(X, title=None):
    x_min, x_max = np.min(X, 0), np.max(X, 0)
    X = (X - x_min) / (x_max - x_min)

    plt.figure(num=None, figsize=(12, 8), dpi=80, facecolor="w", edgecolor="k")
    ax = plt.subplot(111)
    for i in range(X.shape[0]):
        plt.text(
            X[i, 0],
            X[i, 1],
            str(y[i]),
            color=plt.cm.Dark2(y[i]),
            fontdict={"weight": "bold", "size": 9},
        )

    if hasattr(offsetbox, "AnnotationBbox"):
        # only print thumbnails with matplotlib > 1.0
        shown_images = np.array([[1.0, 1.0]])  # just something big
        for i in range(X.shape[0]):
            dist = np.sum((X[i] - shown_images) ** 2, 1)
            if np.min(dist) < 4e-3:
                # don't show points that are too close
                continue
            shown_images = np.r_[shown_images, [X[i]]]
            imagebox = offsetbox.AnnotationBbox(
                offsetbox.OffsetImage(digits.images[i], cmap=plt.cm.gray_r), X[i]
            )
            ax.add_artist(imagebox)
    plt.xticks([]), plt.yticks([])
    if title is not None:
        plt.title(title)


plot_embedding(X_projected, "Random Gaussian Projection of the digits")


print("Computing sparse random projection")
rp = random_projection.SparseRandomProjection(n_components=2, random_state=42)
X_projected = rp.fit_transform(X)
plot_embedding(X_projected, "Random Sparse Projection of the digits")

Computing sparse random projection


print("Computing SVD projection")
t0 = time()
transformer = decomposition.TruncatedSVD(n_components=2)
X_pca = transformer.fit_transform(X)
plot_embedding(
    X_pca, "Principal Components projection of the digits (time %.2fs)" % (time() - t0)
)

print(f"Ratio of zero entries: { np.mean(transformer.components_ == 0)}")

img = np.zeros((10 * 1, 10 * 2))
for i in range(1):
    ix = 10 * i + 1
    for j in range(2):
        iy = 10 * j + 1
        img[ix : ix + 8, iy : iy + 8] = transformer.components_[j].reshape((8, 8))

plt.figure(num=None, figsize=(12, 8), dpi=80, facecolor="w", edgecolor="k")
plt.imshow(img, cmap=plt.cm.binary)
plt.xticks([])
plt.yticks([])
plt.title("Truncated SVD")

Computing SVD projection
Ratio of zero entries: 0.03125

Text(0.5, 1.0, 'Truncated SVD')


print("Computing PCA projection")
t0 = time()
transformer = decomposition.PCA(n_components=2)
X_pca = transformer.fit_transform(X)
plot_embedding(
    X_pca, "Principal Components projection of the digits (time %.2fs)" % (time() - t0)
)

print(f"Ratio of zero entries: { np.mean(transformer.components_ == 0)}")
img = np.zeros((10 * 1, 10 * 2))
for i in range(1):
    ix = 10 * i + 1
    for j in range(2):
        iy = 10 * j + 1
        img[ix : ix + 8, iy : iy + 8] = (
            transformer.components_[j] * np.sqrt(transformer.singular_values_[j])
            + transformer.mean_
        ).reshape((8, 8))

plt.figure(num=None, figsize=(12, 8), dpi=80, facecolor="w", edgecolor="k")
plt.imshow(img, cmap=plt.cm.binary)
plt.xticks([])
plt.yticks([])
plt.title("Principal Axes")

Computing PCA projection
Ratio of zero entries: 0.03125

Text(0.5, 1.0, 'Principal Axes')


t0 = time()
sparse_transformer = SparsePCA(n_components=2, random_state=0)
X_sparse_PCA = sparse_transformer.fit_transform(X)
plot_embedding(
    X_sparse_PCA,
    "Sparse Principal Components projection of the digits (time %.2fs)" % (time() - t0),
)
print(f"Ratio of zero entries: { np.mean(sparse_transformer.components_ == 0)}")

Ratio of zero entries: 0.203125


t0 = time()
sparser_transformer = SparsePCA(n_components=2, alpha=50, random_state=0)
X_sparse_PCA = sparser_transformer.fit_transform(X)
plot_embedding(
    X_sparse_PCA,
    "Sparse Principal Components projection of the digits (time %.2fs)" % (time() - t0),
)
print(f"Ratio of zero entries: { np.mean(sparser_transformer.components_ == 0)}")

img = np.zeros((10 * 1, 10 * 2))
for i in range(1):
    ix = 10 * i + 1
    for j in range(2):
        iy = 10 * j + 1
        img[ix : ix + 8, iy : iy + 8] = (sparser_transformer.components_[j]).reshape(
            (8, 8)
        )

plt.figure(num=None, figsize=(12, 8), dpi=80, facecolor="w", edgecolor="k")
plt.imshow(img, cmap=plt.cm.binary)
plt.xticks([])
plt.yticks([])
plt.title("Principal Axes")

Ratio of zero entries: 0.7578125

Text(0.5, 1.0, 'Principal Axes')


print("Computing MDS embedding")
clf = manifold.MDS(n_components=2, n_init=1, max_iter=100)
t0 = time()
X_mds = clf.fit_transform(X)
print("Done. Stress: %f" % clf.stress_)
plot_embedding(X_mds, "MDS embedding of the digits (time %.2fs)" % (time() - t0))

Computing MDS embedding

/Users/zhujin/miniconda3/envs/seek/lib/python3.8/site-packages/sklearn/manifold/_mds.py:298: FutureWarning: The default value of `normalized_stress` will change to `'auto'` in version 1.4. To suppress this warning, manually set the value of `normalized_stress`.
  warnings.warn(

Done. Stress: 358452640.238472


print("Computing NMF embedding")
NMF_transformer = decomposition.NMF(
    n_components=2, alpha_W=1, alpha_H=1, max_iter=10000, init="random"
)
t0 = time()
X_nmf = NMF_transformer.fit_transform(X)
plot_embedding(X_nmf, "NMF embedding of the digits (time %.2fs)" % (time() - t0))

Computing NMF embedding

LSE ST510: Foundations of Machine Learning¶

Week 11: Unsupervised Learning - Dimension Reduction¶

Random Projection¶

Sparse Random Projection¶

Truncated SVD¶

Principal Component Analysis (PCA)¶

Sparse PCA¶

Sparser PCA¶

Multi-dimensional Scaling (MDS)¶

Non-negative Matrix Factorization (NMF)¶