# 2. 生成一个高维数据集 X, y = make_classification(n_samples=100, n_features=20, n_informative=2, n_redundant=0, random_state=42) # 3. 应用PCA降维到2维pca= PCA(n_components=2) X_pca =pca.fit_transform(X) # 4. 查看降维结果plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y)...
from sklearn.decomposition import PCA kmeans = KMeans(n_clusters=3, random_state=42) kmeans_clusters = kmeans.fit_predict(data) pca = PCA(n_components=2) reduced_data = pca.fit_transform(data) plt.figure(figsize=(8, 6)) scatter = plt.scatter(reduced_data[:, 0], reduced_data[:, ...
K-Means的惯性计算方式是,每个样本与最接近的集群中心点的均方距离的总和。 kmeans_per_k = [KMeans(n_clusters=k, random_state=42).fit(X)forkinrange(1,10)] inertias = [model.inertia_formodelinkmeans_per_k] plt.figure(figsize=(8,3.5)) plt.plot(range(1,10), inertias,"bo-") plt.xlab...
kmeans = KMeans(n_clusters=k, random_state=42) kmeans.fit(X) # 计算每个数据点的轮廓系数 score = silhouette_score(X, kmeans.labels_) # 计算整个聚类的 Silhouette 统计量 silhouette_scores.append(score) # 选择具有最大 Silhouette 统计量的 k 值 best_k = k_range[np.argmax(silhouette_score...
random_state=42 ) pca_3d_object.fit(df) df_pca_3d = pca_3d_object.transform(df) df_pca_3d.columns = ["comp1", "comp2", "comp3"] df_pca_3d["cluster"] = predict return pca_3d_object, df_pca_3d 8.2 降维可视化 下面是基于2个主成分的可视化绘图函数: ...
from sklearn.cluster import KMeansimport matplotlib.pyplot as pltdef elbow_method(data, max_k): sse = [] for k in range(1, max_k + 1): kmeans = KMeans(n_clusters=k, random_state=42) kmeans.fit(data) sse.append(kmeans.inertia_) plt.plot(range(1, max_k + ...
rcParams['axes.unicode_minus']=False #显示负号 X, y = make_blobs(n_samples=5000, centers=4, cluster_std = 2.5, n_features=2,random_state=42) K=range(1,10) # 直接计算sse sse_result=[] for k in K: kmeans=KMeans(n_clusters=k, random_state=666) kmeans.fit(X) sse_result....
rng = np.random.RandomState(13)X_stretched = np.dot(X, rng.randn(2, 2))gmm = GMM(n_components=4, covariance_type='full', random_state=42)plot_gmm(gmm, X_stretched) 四、GMM模型的组件 下面考虑一个特殊的分布形式。如下图所示
X, y = make_blobs(n_samples=5000, centers=4, cluster_std = 2.5, n_features=2,random_state=42) K=range(1,10) # 直接计算sse sse_result=[] for k in K: kmeans=KMeans(n_clusters=k, random_state=666) kmeans.fit(X) sse_result.append(sum(np.min(cdist(X,kmeans.cluster_centers_...
42. 43. 44. 45. 46. 47. 48. 49. 50. 51. 52. 53. 54. 55. 56. 57. 2 重要参数init & random_state & n_init:初始质心怎么放好? X y plus= KMeans(n_clusters = 10).fit(X) plus.n_iter_ random= KMeans(n_clusters = 10,init="random",random_state=420).fit(X) ...