Files
ieemoo-ai-imageassessment/dealdata.py
2024-11-27 15:37:10 +08:00

161 lines
5.6 KiB
Python

import pickle
import os
os.environ["OMP_NUM_THREADS"] = '1'
from sklearn.decomposition import PCA
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn import metrics
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
import shutil
import math
distance_lists = []
all_distance_lists = []
def showImg(newx):
plt.scatter(newx[:, 0], newx[:, 1])
plt.show()
def showHistogram(data):
fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(10, 8))
bins = np.arange(0, 1.5, 0.1)
axs[0, 0].hist(data[0], bins=bins, edgecolor='black')
axs[0, 0].set_title('silhouette')
axs[0, 0].set_xlabel('Similarity')
axs[0, 0].set_ylabel('Frequency')
axs[0, 0].legend(labels=['频次'])
axs[0, 1].hist(data[1], bins=bins, edgecolor='black')
axs[0, 1].set_title('calinski')
axs[0, 1].set_xlabel('Similarity')
axs[0, 1].set_ylabel('Frequency')
axs[0, 1].legend(labels=['频次'])
axs[1, 0].hist(data[2], bins=bins, edgecolor='black')
axs[1, 0].set_title('davies')
axs[1, 0].set_xlabel('Similarity')
axs[1, 0].set_ylabel('Frequency')
axs[1, 0].legend(labels=['频次'])
axs[1, 1].hist(data[3], bins=bins, edgecolor='black')
axs[1, 1].set_title('inertia')
axs[1, 1].set_xlabel('Similarity')
axs[1, 1].set_ylabel('Frequency')
axs[1, 1].legend(labels=['频次'])
# 显示图形
plt.tight_layout()
plt.savefig('multiple_histograms_in_ranges_non_pca.png')
plt.show()
def get_pca_data(data):
pca = PCA(n_components=16)
newx = pca.fit_transform(data)
# print(pca.explained_variance_ratio_)
return newx
def cosine_similarity(vec1, vec2):
# dot_product = sum(a * b for a, b in zip(vec1, vec2))
# norm_vec1 = math.sqrt(sum(x ** 2 for x in vec1))
# norm_vec2 = math.sqrt(sum(x ** 2 for x in vec2))
# return dot_product / (norm_vec1 * norm_vec2)
vec1 = np.array(vec1)
vec2 = np.array(vec2)
cos_sim = vec1.dot(vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
return cos_sim
def get_kclusters(data, k, dicts, standard='silhouette'):
Kmeans = KMeans(n_clusters=k, n_init='auto')
y_pred = Kmeans.fit_predict(data)
if standard == 'silhouette':
s_score = silhouette_score(data, y_pred, metric='euclidean')
dicts[s_score] = [y_pred, Kmeans.cluster_centers_]
elif standard == 'calinski':
c_score = calinski_harabasz_score(data, y_pred)
dicts[c_score] = [y_pred, Kmeans.cluster_centers_]
elif standard == 'davies':
d_score = davies_bouldin_score(data, y_pred)
dicts[d_score] = [y_pred, Kmeans.cluster_centers_]
elif standard == 'inertia':
i_score = Kmeans.inertia_
dicts[i_score] = [y_pred, Kmeans.cluster_centers_]
return dicts
def get_keams(data, standard='silhouette'):
global distance_lists
distance_list = []
dicts = {}
for k in range(2, 10):
dicts = get_kclusters(data, k, dicts, standard)
if standard == 'silhouette' or standard == 'calinski':
max_key = max(dicts.keys())
value = dicts[max_key]
elif standard == 'davies' or standard == 'inertia':
max_key = min(dicts.keys())
value = dicts[max_key]
for num, i in enumerate(value[0]):
distance_list.append(abs(cosine_similarity(value[1][i], data[num])))
distance_lists += distance_list
return distance_lists
def move_file(labels, y_pred):
for label, y_label in zip(labels, y_pred):
print('label >>> {}'.format(label))
if not os.path.isdir(os.sep.join(['data', label.split('_')[0], str(y_label)])):
os.mkdir(os.sep.join(['data', label.split('_')[0], str(y_label)]))
try:
shutil.move(os.sep.join(['data', label.split('_')[0], label]),
os.sep.join(['data', label.split('_')[0], str(y_label), label]))
except Exception as e:
print(e)
def move_video(root_pth='data_test'): # 数据准备
for Dirs in os.listdir(root_pth):
for root, dirs, files in os.walk(os.sep.join([root_pth, Dirs])):
if len(dirs) > 0:
for dir in dirs:
filespth = os.sep.join([root_pth, Dirs, dir])
for file in os.listdir(filespth):
if file.endswith('.jpg'):
oldpth = os.sep.join([filespth, file])
newpth = os.sep.join([root_pth, Dirs, file])
shutil.move(oldpth, newpth)
def main():
global distance_lists
for standard in ['silhouette', 'calinski', 'davies', 'inertia']:
with open('data_test/data.pkl', 'rb') as f:
data = pickle.load(f)
for keys, values in data.items():
all_data, labels = [], []
temp = None
for value in values:
for key1, value1 in value.items():
# print('key1 >>>> {}'.format(key1))
if temp is None:
labels.append(key1)
temp = value1
else:
labels.append(key1)
temp = np.vstack((temp, value1))
try:
# temp = get_pca_data(temp)
get_keams(temp, standard)
# move_file(labels, y_pred)
except Exception as e:
print(e)
f.close()
all_distance_lists.append(distance_lists)
distance_lists = []
showHistogram(all_distance_lists)
if __name__ == '__main__':
main()
# move_video()