{ "cells": [ { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "from sklearn.cluster import KMeans\n", "from sklearn.mixture import GaussianMixture\n", "from sklearn.decomposition import PCA\n", "import matplotlib.cm as cm\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "raw_data = np.loadtxt('EEG_feature.txt')\n", "# print(raw_data.shape)\n", "subject_video_label = np.loadtxt('subject_video.txt')\n", "#print(subject_video_label[:,0])\n", "valence_arousal_label = np.loadtxt('valence_arousal_label.txt')" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.662445447924595\n" ] } ], "source": [ "pca = PCA(n_components=2)\n", "out = pca.fit_transform(raw_data)\n", "# print(out.shape)\n", "# print(pca.explained_variance_ratio_)\n", "print(sum(pca.explained_variance_ratio_))" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "class cluster_eval(object):\n", " def _dist(self, x, y):\n", " return np.sum((x - y) ** 2) ** 0.5\n", " def __init__(self, raw_data):\n", " self.raw_data = raw_data\n", " self.dis = [[self._dist(raw_data[i], raw_data[j]) for j in range(i)] for i in range(len(raw_data))]\n", " def eval(self, label, centers, clusters):\n", " self.clu_label = [[] for i in range(clusters)]\n", " for i in range(len(label)):\n", " self.clu_label[label[i]].append(i)\n", " centers = [np.mean(self.raw_data[self.clu_label[i]], axis=0) for i in range(clusters)]\n", " # avg, diam\n", " self.clu_avg = [0 for i in range(clusters)]\n", " self.diam = [0 for i in range(clusters)]\n", " for i in range(clusters):\n", " clu_num = len(self.clu_label[i])\n", " for j in range(clu_num):\n", " for k in range(j):\n", " jk_dis = self.dis[self.clu_label[i][j]][self.clu_label[i][k]]\n", " self.clu_avg[i] += jk_dis\n", " self.diam[i] = max(self.diam[i], jk_dis)\n", " self.clu_avg[i] *= 2 / (clu_num * (clu_num - 1))\n", " # dcen\n", " self.cen = [[self._dist(centers[i], centers[j]) for j in range(i)] for i in range(clusters)]\n", " # dmin\n", " self.dmin = [[j for j in i] for i in self.cen]\n", " for i in range(clusters):\n", " for j in range(i):\n", " for k in self.clu_label[i]:\n", " for l in self.clu_label[j]:\n", " self.dmin[i][j] = min(self.dmin[i][j], self.dis[max(k, l)][min(k, l)])\n", " # DBI\n", " self.DBI = sum(max(self.cen[max(i, j)][min(i, j)] for j in list(range(i)) + list(range(i + 1, clusters))) for i in range(clusters)) / clusters\n", " # print(self.DBI)\n", " # Dunn\n", " self.Dunn = min(min(i) for i in self.dmin[1:]) / max(self.diam)\n", " # print(self.Dunn)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "for n_clusters in [2]:#, 3, 4, 5, 6, 8, 10, 32, 38]:\n", " kmeans = KMeans(n_clusters=n_clusters, random_state=0)\n", " kmeansout = kmeans.fit(raw_data)\n", " e = cluster_eval(raw_data)\n", " e.eval(kmeansout.labels_, kmeansout.cluster_centers_, n_clusters)\n", " # print(n_clusters, e.DBI, e.Dunn)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2 11.96415880057959 0.06445432577653755\n", "3 13.050998228978061 0.06836927195715756\n", "4 14.56934977532082 0.07353152299031981\n", "5 22.827039192118047 0.06599536810054239\n", "6 22.53352523127153 0.07698826038993817\n", "8 23.286835603050353 0.10264918722812263\n", "10 23.316332832325283 0.10219677382317006\n", "32 23.748717193911787 0.12387311489577149\n" ] } ], "source": [ "for n_clusters in [2, 3, 4, 5, 6, 8, 10, 32]:\n", " kmeans = KMeans(n_clusters=n_clusters, random_state=0, init='random')\n", " kmeansout = kmeans.fit(raw_data)\n", " e = cluster_eval(raw_data)\n", " e.eval(kmeansout.labels_, kmeansout.cluster_centers_, n_clusters)\n", " print(n_clusters, e.DBI, e.Dunn)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "full 22.47773107988422 0.09831820651369301\n", "tied 23.670231282669295 0.14666086436496323\n", "diag 23.230354582341718 0.07905186911790175\n", "spherical 23.505452324345203 0.08163001651971362\n" ] } ], "source": [ "for covariance_type in ['full', 'tied', 'diag', 'spherical']:\n", " n_clusters = 6\n", " gm = GaussianMixture(n_components=n_clusters, covariance_type=covariance_type)\n", " gmout = gm.fit_predict(raw_data)\n", " e = cluster_eval(raw_data)\n", " e.eval(gmout, None, n_clusters)\n", " print(covariance_type, e.DBI, e.Dunn)" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "def jaccard(clu_label, label):\n", " n = len(clu_label)\n", " a = 0\n", " bc = 0\n", " for i in range(n):\n", " for j in range(i):\n", " if clu_label[i] == clu_label[j] and label[i] == label[j]:\n", " a += 1\n", " if clu_label[i] != clu_label[j] and label[i] == label[j]:\n", " bc += 1\n", " if clu_label[i] == clu_label[j] and label[i] == label[j]:\n", " bc += 1\n", " return a / (a + bc)" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.4533038469950667\n", "0.011226523974399329\n" ] } ], "source": [ "kmeans = KMeans(n_clusters=32, random_state=0)\n", "kmeansout = kmeans.fit(raw_data).labels_\n", "subject_label = subject_video_label[:, 0]\n", "video_label = subject_video_label[:, 1]\n", "print(jaccard(kmeansout, subject_label))\n", "print(jaccard(kmeansout, video_label))" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.title('pca result')\n", "plt.scatter(out[..., 0], out[..., 1])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 2 }