from sklearn.decomposition import PCA
samples = df.drop("target", axis = 1).values
model = PCA(n_components=2)
model.fit(samples)
# Get the mean of the grain samples: mean
mean = model.mean_
# Get the first principal component: first_pc
first_pc = model.components_[0,:]
# Visualize direction of the component
plt.arrow(mean[0] , mean[1], first_pc[0], first_pc[1], color='red', width=0.01)
plt.axis('equal')
plt.show()
transformed = model.transform(samples)
# Principal Components
principal_components = model.components_
# Visualize principal components contribution
features = range(model.n_components_)
plt.bar(features, model.explained_variance_)
plt.show()
# Visualize how the dataset looks after transformation
xs = transformed[:,0]
ys = transformed[:,1]
plt.scatter(xs, ys, c=df["target"].values)
plt.show()
# Contribution of Original Features to Principal Components
for i, pc in enumerate(principal_components):
plt.bar(list(df.columns), np.abs(pc), label=f'PC {i + 1}', alpha=0.7)
plt.xlabel('Original Features')
plt.ylabel('Absolute Loadings')
plt.legend()
plt.show()
#### TruncatedSVD : PCA on sparse dataset (most entries are zero, remembers entries by saving columns that have values to save space)
from sklearn.decomposition import TruncatedSVD
# Apply TruncatedSVD
svd_model = TruncatedSVD(n_components=2)
transformed_svd = svd_model.fit_transform(samples.values) # samples is scipy.sparse.csr_matrix
# Visualize how the dataset looks after transformation
xs_svd = transformed_svd[:, 0]
ys_svd = transformed_svd[:, 1]
plt.scatter(xs_svd, ys_svd)
plt.show()
# Principal Components
svd_principal_component = svd_model.components_
# Visualize principal components contribution
plt.bar(range(1, svd_model.n_components + 1), svd_model.explained_variance_ratio_)
plt.show()
# Contribution of Original Features to Principal Components
for i, loading in enumerate(svd_principal_component):
plt.bar(df.columns, np.abs(loading), label=f'PC {i + 1}', alpha=0.7)
plt.show()