#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Fri Dec 3 16:29:12 2021 @author: pfaure """ from scipy.io import arff import numpy as np import time from sklearn import cluster, metrics, preprocessing def extract_data_2d(data_path): databrut = arff.loadarff(open(data_path + ".arff", 'r')) return np.array([[x[0], x[1]] for x in databrut[0]]) def extract_data_3d(data_path): databrut = arff.loadarff(open(data_path + ".arff", 'r')) return np.array([[x[0], x[1], x[2]] for x in databrut[0]]) def scale_data(data): scaler = preprocessing.StandardScaler() return scaler.fit_transform(data) def apply_kmeans(data, k: int = 3, init="k-means++"): tps1 = time.time() model_km = cluster.KMeans(n_clusters=k, init=init) model_km.fit(data) tps2 = time.time() return (model_km, round((tps2 - tps1)*1000, 2)) def apply_agglomerative_clustering(data, k: int = 3, linkage="complete"): tps1 = time.time() model_agg = cluster.AgglomerativeClustering( n_clusters=k, affinity='euclidean', linkage=linkage) model_agg.fit(data) tps2 = time.time() return (model_agg, round((tps2 - tps1)*1000, 2)) def evaluate_kmeans(data, model_km): silh = metrics.silhouette_score(data, model_km.labels_, metric='euclidean') return (silh, model_km.inertia_, model_km.n_iter_) def evaluate_agglomerative_clustering(data, model_agg): silh = metrics.silhouette_score( data, model_agg.labels_, metric='euclidean') return silh