import pandas
as pd
import numpy
as np
import matplotlib
.pyplot
as plt
from sklearn
.cluster
import KMeans
from scipy
.spatial
.distance
import cdist
from sklearn
.manifold
import TSNE
from mpl_toolkits
.mplot3d
import Axes3D
data_path
= r
'E:\研一上\数据挖掘\dataset\Case2-clustering\ALS.csv'
eps
= 1e-8
def preprocess(data_path
):
df
= pd
.read_csv
(data_path
)
data
= df
.iloc
[:,1:]
data_norm
= data
.apply(lambda x
:(x
-np
.mean
(x
))/np
.std
(x
)+eps
)
return data_norm
, data
def myplot(data_path
):
data_norm
, data
= preprocess
(data_path
)
for i
in range(8):
plt
.subplot
(2,4,i
+1)
plt
.hist
(data_norm
.iloc
[:,i
].values
,color
='m')
plt
.title
(data_norm
.columns
[i
])
plt
.show
()
def definek(data_path
):
K
= range(20)
data_norm
, data
= preprocess
(data_path
)
data_norm_values
= data
.values
.astype
('float64')
distance
= []
for i
in K
:
model
= KMeans
(n_clusters
=i
+1,random_state
=10)
model
.fit
(data_norm_values
)
centers
= model
.cluster_centers_
distance
.append
(np
.mean
(np
.min(cdist
(data_norm_values
,centers
),axis
=1)))
plt
.plot
(K
, distance
)
plt
.title
('Distance with different clusters')
plt
.xlabel
('clusters')
plt
.ylabel
('Distance')
plt
.show
()
def vis(data_path
):
data_norm
, data
= preprocess
(data_path
)
data_norm_values
= data
.values
.astype
('float64')
model
= KMeans
(n_clusters
=5, random_state
=10)
model
.fit
(data_norm_values
)
label
= model
.predict
(data_norm_values
)
column
= list(data
.columns
)
column
.append
('label')
label
= np
.reshape
(label
,(len(label
),1))
result
= np
.concatenate
((data_norm_values
,label
),axis
=1)
result_df
= pd
.DataFrame
(result
,columns
=column
)
result_df
.to_csv
('./result.csv')
centers
= model
.cluster_centers_
tsne
= TSNE
(n_components
=2,perplexity
=30.0,
early_exaggeration
=12.0, learning_rate
=200.0, n_iter
=1000,
n_iter_without_progress
=300, min_grad_norm
=1e-7,
metric
="euclidean", init
="random", verbose
=0,
random_state
=None, method
='barnes_hut', angle
=0.5)
low_dims_data
= tsne
.fit_transform
(data_norm_values
)
low_dims_center
= tsne
.fit_transform
(centers
)
data_vis
= np
.concatenate
((low_dims_data
, label
), axis
=1)
data_df
= pd
.DataFrame
(data_vis
,columns
=['x', 'y', 'label'])
class_one
= data_df
[data_df
['label'] == 0].values
class_two
= data_df
[data_df
['label'] == 1].values
class_three
= data_df
[data_df
['label'] == 2].values
class_four
= data_df
[data_df
['label'] == 3].values
class_five
= data_df
[data_df
['label'] == 4].values
plt
.scatter
(class_one
[:,0],class_one
[:,1],c
='g')
plt
.scatter
(class_two
[:,0],class_two
[:,1],c
='r')
plt
.scatter
(class_three
[:,0],class_three
[:,1],c
='b')
plt
.scatter
(class_four
[:,0],class_four
[:,1],c
='y')
plt
.scatter
(class_five
[:,0],class_five
[:,1],c
='m')
plt
.show
()
def vis_triples(data_path
):
data_norm
, data
= preprocess
(data_path
)
data_norm_values
= data
.values
.astype
('float64')
model
= KMeans
(n_clusters
=5, random_state
=10)
model
.fit
(data_norm_values
)
label
= model
.predict
(data_norm_values
)
label
= np
.reshape
(label
,(len(label
),1))
centers
= model
.cluster_centers_
tsne
= TSNE
(n_components
=3,perplexity
=30.0,
early_exaggeration
=12.0, learning_rate
=200.0, n_iter
=1000,
n_iter_without_progress
=300, min_grad_norm
=1e-7,
metric
="euclidean", init
="random", verbose
=0,
random_state
=None, method
='barnes_hut', angle
=0.5)
low_dims_data
= tsne
.fit_transform
(data_norm_values
)
data_vis
= np
.concatenate
((low_dims_data
, label
), axis
=1)
data_df
= pd
.DataFrame
(data_vis
,columns
=['x', 'y', 'z','label'])
class_one
= data_df
[data_df
['label'] == 0].values
class_two
= data_df
[data_df
['label'] == 1].values
class_three
= data_df
[data_df
['label'] == 2].values
class_four
= data_df
[data_df
['label'] == 3].values
class_five
= data_df
[data_df
['label'] == 4].values
fig
= plt
.figure
()
ax
= Axes3D
(fig
)
ax
.scatter
(class_one
[:,0],class_one
[:,1],class_one
[:,2],c
='g')
ax
.scatter
(class_two
[:,0],class_two
[:,1],class_two
[:,2],c
='r')
ax
.scatter
(class_three
[:,0],class_three
[:,1],class_three
[:,2],c
='b')
ax
.scatter
(class_four
[:,0],class_four
[:,1],class_four
[:,2],c
='y')
ax
.scatter
(class_five
[:,0],class_five
[:,1],class_five
[:,2],c
='m')
ax
.set_xlabel
('feature1')
ax
.set_ylabel
('feature2')
ax
.set_zlabel
('feature3')
plt
.show
()
def main():
vis_triples
(data_path
)
if __name__
== '__main__':
main
()
转载请注明原文地址: https://mac.8miu.com/read-509737.html