[Zad 3] ZALICZONE

2022-02-11 16:42:38 +01:00
parent 14779630f3
commit cb085dc5d5
9 changed files with 135 additions and 62 deletions
--- a/zad3/.vscode/launch.json
+++ b/zad3/.vscode/launch.json
@@ -0,0 +1,15 @@
 {
    // Use IntelliSense to learn about possible attributes.
    // Hover to view descriptions of existing attributes.
    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
    "version": "0.2.0",
    "configurations": [
        {
            "name": "Python: Current File",
            "type": "python",
            "request": "launch",
            "program": "zad3.py",
            "console": "integratedTerminal"
        }
    ]
 }
--- a/zad3/bz_236713_ml_195642_zad3.pdf
+++ b/zad3/bz_236713_ml_195642_zad3.pdf
--- a/zad3/data1_errors.png
+++ b/zad3/data1_errors.png
--- a/zad3/data2_errors.png
+++ b/zad3/data2_errors.png
--- a/zad3/kmeans.py
+++ b/zad3/kmeans.py
@@ -50,14 +50,47 @@ def plot_kmeans(all_data, k, name_suffix):
    plt.show()
 def plot_kmeans_change(all_data, k, name_suffix, show=True):
    fig, ax = plt.subplots()
    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    ax.set_title(f'k={k}')
    plt.grid(True)
    cluster_scatters = {}
    _, clusters = all_data[-1]
    for key in clusters:
        color = utils.get_color(key / k)
        if clusters[key]:
            lst_x, lst_y = zip(*clusters[key])
            lst_x = list(lst_x)
            lst_y = list(lst_y)
            cluster_scatters[key] = ax.scatter(lst_x, lst_y, color=color)
    all_centroids, _ = zip(*all_data)
    key = 0
    for centroids in zip(*all_centroids):
        color = utils.get_color(key / k)
        lst_x, lst_y = zip(*centroids)
        lst_x = list(lst_x)
        lst_y = list(lst_y)
        cluster_scatters[key] = ax.plot(
            lst_x, lst_y, color=color, marker='X')
        key += 1
    fig.savefig(f'kmeans_change{name_suffix}')
    if show:
        plt.show()
    else:
        plt.close(fig)
 def calc_error(centroids, clusters, k):
-    squared_errors = []
+    errors = 0
    for i in range(k):
-        cluster = np.array(clusters[i])
+        for point in clusters[i]:
-        centroid = np.array([centroids[i] for _ in range(len(cluster))])
+            errors += np.sqrt(utils.calc_length(point, centroids[i]))
-        errors = cluster - centroid
+    points_count = sum([len(clusters[n]) for n in clusters])
-        squared_errors.append([e ** 2 for e in errors])
+    return errors/points_count
    return sum([np.mean(err) if err else 0 for err in squared_errors])
 def plot_error_data(error_data):
@@ -83,10 +116,11 @@ def print_stats(k, data):
    m = np.mean(errs)
    std = np.std(errs)
    min_err = np.min(errs)
-    empty_clusters = [sum([1 for cluster in sample.values() if not cluster]) for sample in clusters]
+    empty_clusters = [
        sum([1 for cluster in sample.values() if not cluster]) for sample in clusters]
    empty_clusters_mean = sum(empty_clusters)/len(empty_clusters)
    empty_clusters_std = np.std(empty_clusters)
-    print(f'MSE={m}')
+    print(f'Średni błąd={m}')
    print(f'std={std}')
    print(f'min(err)={min_err}')
    print(f'Mean of empty clusters count={empty_clusters_mean}')
@@ -129,7 +163,7 @@ def kmeans(data, method, k):
    return kmeans_with_err
-def init_units(data, k, method='forgy'):  # TODO: Add k-units++ and Random Partition
+def init_units(data, k, method='forgy'):  # TODO: Add k-units++
    match method:
        case 'forgy':
            return sample(data, k)
--- a/zad3/ml_195642_zad3.odt
+++ b/zad3/ml_195642_zad3.odt
--- a/zad3/som.py
+++ b/zad3/som.py
@@ -5,25 +5,26 @@ from random import shuffle
 import numpy as np
-def find_bmu(som, x):
+def find_bmu(som, exhausted, x):
    '''Return the (g,h) index of the BMU in the grid'''
    #wrong_dist_sq = np.asarray([u.calc_length(x, s) for s in som])
-    dist_sq = (np.square(som - x)).sum(axis=2)
+    dist_sq = exhausted * (np.square(som - x)).sum(axis=2)
    return np.unravel_index(np.argmin(dist_sq, axis=None), dist_sq.shape)
-def dist_comp(som, x):
+def dist_comp(som, exhausted, x):
    distsq = []
    for i in range(som.shape[0]):
        for j in range(som.shape[1]):
-            distsq.append([(i, j), u.calc_length(x, som[i][j])])
+            distsq.append([(i, j), exhausted[i][j] *
                          u.calc_length(x, som[i][j])])
    return sorted(distsq, key=lambda x: x[1])
-# Update the weights of the SOM cells when given a single training example
+def update_weights(som, exhausted, train_ex, learn_rate, radius_sq,
-# and the model parameters along with BMU coordinates as a tuple
+                   bmu_coord, algorithm):
-def update_weights(som, train_ex, learn_rate, radius_sq,
+    '''Update the weights of the SOM cells when given a single training example
-                   bmu_coord, algorithm, step=3):
+    and the model parameters along with BMU coordinates as a tuple'''
    g, h = bmu_coord
    # if radius is close to zero then only BMU is changed
    if radius_sq < 1e-3:
@@ -33,19 +34,19 @@ def update_weights(som, train_ex, learn_rate, radius_sq,
    match algorithm:
        case 'kohonen':
            # Change all cells in a  neighborhood of BMU
-            for i in range(max(0, g-step), min(som.shape[0], g+step)):
+            for i in range(som.shape[0]):
-                for j in range(max(0, h-step), min(som.shape[1], h+step)):
+                for j in range(som.shape[1]):
                    dist_sq = np.square(i - g) + np.square(j - h)
                    dist_func = np.exp(-dist_sq / 2 / radius_sq)
                    som[i, j, :] += learn_rate * \
                        dist_func * (train_ex - som[i, j, :])
        case 'neuron gas':
-            dist_rank = dist_comp(som, train_ex)
+            dist_rank = dist_comp(som, exhausted, train_ex)
            for i in range(len(dist_rank)):
                dist_func = np.exp(-i / 2 / np.sqrt(radius_sq))
-                som[dist_rank[i][0], dist_rank[i][1], :] += \
+                som[dist_rank[i][0][0], dist_rank[i][0][1], :] += \
                    learn_rate * dist_func * \
-                    (train_ex - som[dist_rank[i][0], dist_rank[i][1], :])
+                    (train_ex - som[dist_rank[i][0][0], dist_rank[i][0][1], :])
        case _:
            raise NotImplementedError(
@@ -57,29 +58,33 @@ def train_som(som, train_data, learn_rate=.1, radius_sq=1,
              lr_decay=.1, radius_decay=.1, epochs=20, algorithm='kohonen'):
    '''Main routine for training an SOM. It requires an initialized SOM grid
    or a partially trained grid as parameter'''
    exhausted = np.ones((som.shape[0], som.shape[1]))
    learn_rate_0 = learn_rate
    radius_0 = radius_sq
-    soms_with_error = [(som.copy(), calc_som_error(som, train_data))]
+    soms_with_error = [
        (som.copy(), calc_som_error(som, exhausted,  train_data))]
    for epoch in np.arange(epochs):
        shuffle(train_data)
        for train_ex in train_data:
-            g, h = find_bmu(som, train_ex)
+            g, h = find_bmu(som, exhausted, train_ex)
-            som = update_weights(som, train_ex,
+            som = update_weights(som, exhausted, train_ex,
                                 learn_rate, radius_sq, (g, h), algorithm)
            exhausted[g][h] += 1
        # Update learning rate and radius
        learn_rate = learn_rate_0 * np.exp(-epoch * lr_decay)
        radius_sq = radius_0 * np.exp(-epoch * radius_decay)
-        error = calc_som_error(som, train_data)
+        exhausted = np.ones((som.shape[0], som.shape[1]))
        error = calc_som_error(som, exhausted, train_data)
        soms_with_error.append((som.copy(), error))
        if error < 1e-3:
            break
    return soms_with_error
-def calc_som_error(som, train_data):
+def calc_som_error(som, exhausted, train_data):
    errors = []
    for train_ex in train_data:
-        g, h = find_bmu(som, train_ex)
+        g, h = find_bmu(som, exhausted, train_ex)
        errors.append(u.calc_length(train_ex, som[g][h]))
    return np.mean(np.sqrt(np.asarray(errors)))
@@ -135,6 +140,8 @@ def init_neurons(data, k, rand: np.random.RandomState = None, method='random'):
 def print_som_stats(soms_with_errors, train_data):
    print('=' * 20)
    exhausted = np.ones(
        (soms_with_errors[0][0].shape[0], soms_with_errors[0][0].shape[1]))
    soms, errs = zip(*soms_with_errors)
    m = np.mean(errs)
    std = np.std(errs)
@@ -142,7 +149,7 @@ def print_som_stats(soms_with_errors, train_data):
    dead_neurons_count = []
    for som in soms:
        dead_neurons_count.append(
-            20-len(set([find_bmu(som, x) for x in train_data])))
+            20-len(set([find_bmu(som, exhausted, x) for x in train_data])))
    print("Średni błąd: ", m)
    print("Odchylenie standardowe: ", std)
    print("Błąd minimalny: ", min_err)
@@ -150,3 +157,4 @@ def print_som_stats(soms_with_errors, train_data):
        f'Średnia liczba nieaktywnych neuronów: {np.mean(dead_neurons_count)}')
    print(
        f'Odchylenie standardowe liczby nieaktywnych neuronów: {np.std(dead_neurons_count)}')
    print('=' * 20)
--- a/zad3/utils.py
+++ b/zad3/utils.py
@@ -9,7 +9,7 @@ def get_color(i):
 def calc_length(a, b):
    '''Calculate Euclidian distance between points'''
-    assert len(a)==len(b)
+    assert len(a) == len(b)
    return np.square(np.asarray(b)-np.asarray(a)).sum()
@@ -26,7 +26,7 @@ def plot_data(data):
    plt.show()
-def plot_error_data(error_data):
+def plot_error_data(error_data, fname=None):
    fig, ax = plt.subplots()
    ax.set_xlabel('k')
    ax.set_ylabel('err')
@@ -39,7 +39,10 @@ def plot_error_data(error_data):
    lst_y = list(lst_y)
    ax.plot(lst_x, lst_y, 'ro-')
-    plt.show()
+    if fname:
        plt.savefig(fname)
    else:
        plt.show()
 def get_data1():
--- a/zad3/zad3.py
+++ b/zad3/zad3.py
@@ -6,6 +6,9 @@ import json
 METHODS = ['forgy', 'random_partition']
 SOM_INIT_METHODS = ['random', 'zeros']
 SOM_ALGORITHMS = ['kohonen', 'neuron gas']
 SOM_PARAMETERS_SETS = [(.1, .5), (.1, .5), (.1, 1), (.33, .1), (.33, .5), (.33, 1), (.66, .1), (.66, .5), (.66, 1),
                       (.99, .1), (.99, .5), (.99, 1)]
 def get_datas_from_json():
@@ -26,33 +29,46 @@ def get_datas_random():
 def main():
    datas = get_datas_from_json()
    benchmark_errors = False
    rand = np.random.RandomState(0)
    index = 1
    print("Self-organizing map")
    for data in datas:
        print(f'Data set: {index}')
        utils.plot_data(data)
-        for method in SOM_INIT_METHODS:
+        for algorithm in SOM_ALGORITHMS:
-            print(f'Initialization method: {method}')
+            print(f'Weights update algorithm: {algorithm}')
-            errors = []
+            for method in SOM_INIT_METHODS:
-            for k in range(2, 21, 2):
+                print(f'Initialization method: {method}')
-                som_data = som.init_neurons(data, k, rand, method)
+                for param_set in SOM_PARAMETERS_SETS:
-                soms_with_error = som.train_som(som_data, data, algorithm='kohonen')
+                    print(
-                error = soms_with_error[-1][1]
+                        f'Learn rate: {param_set[0]},  Radius square: {param_set[1]}')
-                errors.append((k, error))
+                    errors = {}
-                soms,_ = zip(*soms_with_error)
+                    for k in range(2, 21):
-                #som.plot_with_data(soms, data, f'_{method}_{k}_data{index}')
+                        som_data = som.init_neurons(data, k, rand, method)
-            utils.plot_error_data(errors)
+                        soms_with_error = som.train_som(som_data, data, learn_rate=param_set[0], radius_sq=param_set[1],
-            soms_with_errors = []
+                                                        algorithm=algorithm)
-            for _ in range(100):
+                        error = soms_with_error[-1][1]
-                som_data = som.init_neurons(data, k, rand, method)
+                        errors[k] = error
-                soms_with_error = som.train_som(som_data, data, algorithm='kohonen')
+                        soms, _ = zip(*soms_with_error)
-                soms_with_errors.append(soms_with_error[-1])
+                        som.plot_with_data(
-            som.print_som_stats(soms_with_errors, data)
+                            soms, data, f'_LR{param_set[0]}_RSQ{param_set[1]}_{algorithm}_{method}_neurons{k}_data{index}')
                    if all([i in errors for i in range(2, 21, 2)]):
                        fname = f'som_errors_data{index}_{SOM_PARAMETERS_SETS.index(param_set)}_{algorithm}_{method}.png'
                        utils.plot_error_data([(k, errors[k]) for k in range(2, 21, 2)], fname=fname)
                if benchmark_errors:
                    soms_with_errors = []
                    for _ in range(100):
                        som_data = som.init_neurons(data, 20, rand, method)
                        soms_with_error = som.train_som(
                            som_data, data, algorithm=algorithm)
                        soms_with_errors.append(soms_with_error[-1])
                    som.print_som_stats(soms_with_errors, data)
        index += 1
    index = 1
    for data in datas:
        print(f'Data set {index}')
        utils.plot_data(data)
        for method in METHODS:
            print(f'Method: {method}')
@@ -60,7 +76,7 @@ def main():
            for k in range(2, 21):
                kmeans_with_err = km.kmeans(data, method, k)
                km.print_stats(k, [(iterations[-1], err)
-                               for iterations, err in kmeans_with_err])
+                                   for iterations, err in kmeans_with_err])
                min_err = kmeans_with_err[0][1]
                kmeans = kmeans_with_err[0][0]
                for temp_kmeans, err in kmeans_with_err:
@@ -69,9 +85,12 @@ def main():
                        kmeans = temp_kmeans
                kmeans_data[k] = (kmeans, min_err)
                km.plot_kmeans(kmeans, k, f'_{method}_{k}_{index}')
-            error_data = [[i, kmeans_data[i][1]] for i in range(2, 21, 2)]
+                if k in [2, 10]:
-            utils.plot_error_data(error_data)
+                    km.plot_kmeans_change(kmeans, k, f'_{method}_{k}_{index}')
-        index += 1
+            if all([i in kmeans_data for i in range(2, 21, 2)]):
                error_data = [[i, kmeans_data[i][1]] for i in range(2, 21, 2)]
                utils.plot_error_data(error_data)
            index += 1
 if __name__ == '__main__':