[Zad 3] ZALICZONE
This commit is contained in:
parent
14779630f3
commit
cb085dc5d5
15
zad3/.vscode/launch.json
vendored
Normal file
15
zad3/.vscode/launch.json
vendored
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Python: Current File",
|
||||||
|
"type": "python",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "zad3.py",
|
||||||
|
"console": "integratedTerminal"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
BIN
zad3/bz_236713_ml_195642_zad3.pdf
(Stored with Git LFS)
Normal file
BIN
zad3/bz_236713_ml_195642_zad3.pdf
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
zad3/data1_errors.png
(Stored with Git LFS)
BIN
zad3/data1_errors.png
(Stored with Git LFS)
Binary file not shown.
BIN
zad3/data2_errors.png
(Stored with Git LFS)
BIN
zad3/data2_errors.png
(Stored with Git LFS)
Binary file not shown.
@ -50,14 +50,47 @@ def plot_kmeans(all_data, k, name_suffix):
|
|||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
def plot_kmeans_change(all_data, k, name_suffix, show=True):
|
||||||
|
fig, ax = plt.subplots()
|
||||||
|
ax.set_xlabel('X')
|
||||||
|
ax.set_ylabel('Y')
|
||||||
|
ax.set_title(f'k={k}')
|
||||||
|
|
||||||
|
plt.grid(True)
|
||||||
|
cluster_scatters = {}
|
||||||
|
_, clusters = all_data[-1]
|
||||||
|
for key in clusters:
|
||||||
|
color = utils.get_color(key / k)
|
||||||
|
if clusters[key]:
|
||||||
|
lst_x, lst_y = zip(*clusters[key])
|
||||||
|
lst_x = list(lst_x)
|
||||||
|
lst_y = list(lst_y)
|
||||||
|
cluster_scatters[key] = ax.scatter(lst_x, lst_y, color=color)
|
||||||
|
all_centroids, _ = zip(*all_data)
|
||||||
|
key = 0
|
||||||
|
for centroids in zip(*all_centroids):
|
||||||
|
color = utils.get_color(key / k)
|
||||||
|
lst_x, lst_y = zip(*centroids)
|
||||||
|
lst_x = list(lst_x)
|
||||||
|
lst_y = list(lst_y)
|
||||||
|
cluster_scatters[key] = ax.plot(
|
||||||
|
lst_x, lst_y, color=color, marker='X')
|
||||||
|
key += 1
|
||||||
|
|
||||||
|
fig.savefig(f'kmeans_change{name_suffix}')
|
||||||
|
if show:
|
||||||
|
plt.show()
|
||||||
|
else:
|
||||||
|
plt.close(fig)
|
||||||
|
|
||||||
|
|
||||||
def calc_error(centroids, clusters, k):
|
def calc_error(centroids, clusters, k):
|
||||||
squared_errors = []
|
errors = 0
|
||||||
for i in range(k):
|
for i in range(k):
|
||||||
cluster = np.array(clusters[i])
|
for point in clusters[i]:
|
||||||
centroid = np.array([centroids[i] for _ in range(len(cluster))])
|
errors += np.sqrt(utils.calc_length(point, centroids[i]))
|
||||||
errors = cluster - centroid
|
points_count = sum([len(clusters[n]) for n in clusters])
|
||||||
squared_errors.append([e ** 2 for e in errors])
|
return errors/points_count
|
||||||
return sum([np.mean(err) if err else 0 for err in squared_errors])
|
|
||||||
|
|
||||||
|
|
||||||
def plot_error_data(error_data):
|
def plot_error_data(error_data):
|
||||||
@ -83,10 +116,11 @@ def print_stats(k, data):
|
|||||||
m = np.mean(errs)
|
m = np.mean(errs)
|
||||||
std = np.std(errs)
|
std = np.std(errs)
|
||||||
min_err = np.min(errs)
|
min_err = np.min(errs)
|
||||||
empty_clusters = [sum([1 for cluster in sample.values() if not cluster]) for sample in clusters]
|
empty_clusters = [
|
||||||
|
sum([1 for cluster in sample.values() if not cluster]) for sample in clusters]
|
||||||
empty_clusters_mean = sum(empty_clusters)/len(empty_clusters)
|
empty_clusters_mean = sum(empty_clusters)/len(empty_clusters)
|
||||||
empty_clusters_std = np.std(empty_clusters)
|
empty_clusters_std = np.std(empty_clusters)
|
||||||
print(f'MSE={m}')
|
print(f'Średni błąd={m}')
|
||||||
print(f'std={std}')
|
print(f'std={std}')
|
||||||
print(f'min(err)={min_err}')
|
print(f'min(err)={min_err}')
|
||||||
print(f'Mean of empty clusters count={empty_clusters_mean}')
|
print(f'Mean of empty clusters count={empty_clusters_mean}')
|
||||||
@ -129,7 +163,7 @@ def kmeans(data, method, k):
|
|||||||
return kmeans_with_err
|
return kmeans_with_err
|
||||||
|
|
||||||
|
|
||||||
def init_units(data, k, method='forgy'): # TODO: Add k-units++ and Random Partition
|
def init_units(data, k, method='forgy'): # TODO: Add k-units++
|
||||||
match method:
|
match method:
|
||||||
case 'forgy':
|
case 'forgy':
|
||||||
return sample(data, k)
|
return sample(data, k)
|
||||||
|
BIN
zad3/ml_195642_zad3.odt
(Stored with Git LFS)
BIN
zad3/ml_195642_zad3.odt
(Stored with Git LFS)
Binary file not shown.
48
zad3/som.py
48
zad3/som.py
@ -5,25 +5,26 @@ from random import shuffle
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
def find_bmu(som, x):
|
def find_bmu(som, exhausted, x):
|
||||||
'''Return the (g,h) index of the BMU in the grid'''
|
'''Return the (g,h) index of the BMU in the grid'''
|
||||||
#wrong_dist_sq = np.asarray([u.calc_length(x, s) for s in som])
|
#wrong_dist_sq = np.asarray([u.calc_length(x, s) for s in som])
|
||||||
dist_sq = (np.square(som - x)).sum(axis=2)
|
dist_sq = exhausted * (np.square(som - x)).sum(axis=2)
|
||||||
return np.unravel_index(np.argmin(dist_sq, axis=None), dist_sq.shape)
|
return np.unravel_index(np.argmin(dist_sq, axis=None), dist_sq.shape)
|
||||||
|
|
||||||
|
|
||||||
def dist_comp(som, x):
|
def dist_comp(som, exhausted, x):
|
||||||
distsq = []
|
distsq = []
|
||||||
for i in range(som.shape[0]):
|
for i in range(som.shape[0]):
|
||||||
for j in range(som.shape[1]):
|
for j in range(som.shape[1]):
|
||||||
distsq.append([(i, j), u.calc_length(x, som[i][j])])
|
distsq.append([(i, j), exhausted[i][j] *
|
||||||
|
u.calc_length(x, som[i][j])])
|
||||||
return sorted(distsq, key=lambda x: x[1])
|
return sorted(distsq, key=lambda x: x[1])
|
||||||
|
|
||||||
|
|
||||||
# Update the weights of the SOM cells when given a single training example
|
def update_weights(som, exhausted, train_ex, learn_rate, radius_sq,
|
||||||
# and the model parameters along with BMU coordinates as a tuple
|
bmu_coord, algorithm):
|
||||||
def update_weights(som, train_ex, learn_rate, radius_sq,
|
'''Update the weights of the SOM cells when given a single training example
|
||||||
bmu_coord, algorithm, step=3):
|
and the model parameters along with BMU coordinates as a tuple'''
|
||||||
g, h = bmu_coord
|
g, h = bmu_coord
|
||||||
# if radius is close to zero then only BMU is changed
|
# if radius is close to zero then only BMU is changed
|
||||||
if radius_sq < 1e-3:
|
if radius_sq < 1e-3:
|
||||||
@ -33,19 +34,19 @@ def update_weights(som, train_ex, learn_rate, radius_sq,
|
|||||||
match algorithm:
|
match algorithm:
|
||||||
case 'kohonen':
|
case 'kohonen':
|
||||||
# Change all cells in a neighborhood of BMU
|
# Change all cells in a neighborhood of BMU
|
||||||
for i in range(max(0, g-step), min(som.shape[0], g+step)):
|
for i in range(som.shape[0]):
|
||||||
for j in range(max(0, h-step), min(som.shape[1], h+step)):
|
for j in range(som.shape[1]):
|
||||||
dist_sq = np.square(i - g) + np.square(j - h)
|
dist_sq = np.square(i - g) + np.square(j - h)
|
||||||
dist_func = np.exp(-dist_sq / 2 / radius_sq)
|
dist_func = np.exp(-dist_sq / 2 / radius_sq)
|
||||||
som[i, j, :] += learn_rate * \
|
som[i, j, :] += learn_rate * \
|
||||||
dist_func * (train_ex - som[i, j, :])
|
dist_func * (train_ex - som[i, j, :])
|
||||||
case 'neuron gas':
|
case 'neuron gas':
|
||||||
dist_rank = dist_comp(som, train_ex)
|
dist_rank = dist_comp(som, exhausted, train_ex)
|
||||||
for i in range(len(dist_rank)):
|
for i in range(len(dist_rank)):
|
||||||
dist_func = np.exp(-i / 2 / np.sqrt(radius_sq))
|
dist_func = np.exp(-i / 2 / np.sqrt(radius_sq))
|
||||||
som[dist_rank[i][0], dist_rank[i][1], :] += \
|
som[dist_rank[i][0][0], dist_rank[i][0][1], :] += \
|
||||||
learn_rate * dist_func * \
|
learn_rate * dist_func * \
|
||||||
(train_ex - som[dist_rank[i][0], dist_rank[i][1], :])
|
(train_ex - som[dist_rank[i][0][0], dist_rank[i][0][1], :])
|
||||||
|
|
||||||
case _:
|
case _:
|
||||||
raise NotImplementedError(
|
raise NotImplementedError(
|
||||||
@ -57,29 +58,33 @@ def train_som(som, train_data, learn_rate=.1, radius_sq=1,
|
|||||||
lr_decay=.1, radius_decay=.1, epochs=20, algorithm='kohonen'):
|
lr_decay=.1, radius_decay=.1, epochs=20, algorithm='kohonen'):
|
||||||
'''Main routine for training an SOM. It requires an initialized SOM grid
|
'''Main routine for training an SOM. It requires an initialized SOM grid
|
||||||
or a partially trained grid as parameter'''
|
or a partially trained grid as parameter'''
|
||||||
|
exhausted = np.ones((som.shape[0], som.shape[1]))
|
||||||
learn_rate_0 = learn_rate
|
learn_rate_0 = learn_rate
|
||||||
radius_0 = radius_sq
|
radius_0 = radius_sq
|
||||||
soms_with_error = [(som.copy(), calc_som_error(som, train_data))]
|
soms_with_error = [
|
||||||
|
(som.copy(), calc_som_error(som, exhausted, train_data))]
|
||||||
for epoch in np.arange(epochs):
|
for epoch in np.arange(epochs):
|
||||||
shuffle(train_data)
|
shuffle(train_data)
|
||||||
for train_ex in train_data:
|
for train_ex in train_data:
|
||||||
g, h = find_bmu(som, train_ex)
|
g, h = find_bmu(som, exhausted, train_ex)
|
||||||
som = update_weights(som, train_ex,
|
som = update_weights(som, exhausted, train_ex,
|
||||||
learn_rate, radius_sq, (g, h), algorithm)
|
learn_rate, radius_sq, (g, h), algorithm)
|
||||||
|
exhausted[g][h] += 1
|
||||||
# Update learning rate and radius
|
# Update learning rate and radius
|
||||||
learn_rate = learn_rate_0 * np.exp(-epoch * lr_decay)
|
learn_rate = learn_rate_0 * np.exp(-epoch * lr_decay)
|
||||||
radius_sq = radius_0 * np.exp(-epoch * radius_decay)
|
radius_sq = radius_0 * np.exp(-epoch * radius_decay)
|
||||||
error = calc_som_error(som, train_data)
|
exhausted = np.ones((som.shape[0], som.shape[1]))
|
||||||
|
error = calc_som_error(som, exhausted, train_data)
|
||||||
soms_with_error.append((som.copy(), error))
|
soms_with_error.append((som.copy(), error))
|
||||||
if error < 1e-3:
|
if error < 1e-3:
|
||||||
break
|
break
|
||||||
return soms_with_error
|
return soms_with_error
|
||||||
|
|
||||||
|
|
||||||
def calc_som_error(som, train_data):
|
def calc_som_error(som, exhausted, train_data):
|
||||||
errors = []
|
errors = []
|
||||||
for train_ex in train_data:
|
for train_ex in train_data:
|
||||||
g, h = find_bmu(som, train_ex)
|
g, h = find_bmu(som, exhausted, train_ex)
|
||||||
errors.append(u.calc_length(train_ex, som[g][h]))
|
errors.append(u.calc_length(train_ex, som[g][h]))
|
||||||
return np.mean(np.sqrt(np.asarray(errors)))
|
return np.mean(np.sqrt(np.asarray(errors)))
|
||||||
|
|
||||||
@ -135,6 +140,8 @@ def init_neurons(data, k, rand: np.random.RandomState = None, method='random'):
|
|||||||
|
|
||||||
def print_som_stats(soms_with_errors, train_data):
|
def print_som_stats(soms_with_errors, train_data):
|
||||||
print('=' * 20)
|
print('=' * 20)
|
||||||
|
exhausted = np.ones(
|
||||||
|
(soms_with_errors[0][0].shape[0], soms_with_errors[0][0].shape[1]))
|
||||||
soms, errs = zip(*soms_with_errors)
|
soms, errs = zip(*soms_with_errors)
|
||||||
m = np.mean(errs)
|
m = np.mean(errs)
|
||||||
std = np.std(errs)
|
std = np.std(errs)
|
||||||
@ -142,7 +149,7 @@ def print_som_stats(soms_with_errors, train_data):
|
|||||||
dead_neurons_count = []
|
dead_neurons_count = []
|
||||||
for som in soms:
|
for som in soms:
|
||||||
dead_neurons_count.append(
|
dead_neurons_count.append(
|
||||||
20-len(set([find_bmu(som, x) for x in train_data])))
|
20-len(set([find_bmu(som, exhausted, x) for x in train_data])))
|
||||||
print("Średni błąd: ", m)
|
print("Średni błąd: ", m)
|
||||||
print("Odchylenie standardowe: ", std)
|
print("Odchylenie standardowe: ", std)
|
||||||
print("Błąd minimalny: ", min_err)
|
print("Błąd minimalny: ", min_err)
|
||||||
@ -150,3 +157,4 @@ def print_som_stats(soms_with_errors, train_data):
|
|||||||
f'Średnia liczba nieaktywnych neuronów: {np.mean(dead_neurons_count)}')
|
f'Średnia liczba nieaktywnych neuronów: {np.mean(dead_neurons_count)}')
|
||||||
print(
|
print(
|
||||||
f'Odchylenie standardowe liczby nieaktywnych neuronów: {np.std(dead_neurons_count)}')
|
f'Odchylenie standardowe liczby nieaktywnych neuronów: {np.std(dead_neurons_count)}')
|
||||||
|
print('=' * 20)
|
||||||
|
@ -9,7 +9,7 @@ def get_color(i):
|
|||||||
|
|
||||||
def calc_length(a, b):
|
def calc_length(a, b):
|
||||||
'''Calculate Euclidian distance between points'''
|
'''Calculate Euclidian distance between points'''
|
||||||
assert len(a)==len(b)
|
assert len(a) == len(b)
|
||||||
return np.square(np.asarray(b)-np.asarray(a)).sum()
|
return np.square(np.asarray(b)-np.asarray(a)).sum()
|
||||||
|
|
||||||
|
|
||||||
@ -26,7 +26,7 @@ def plot_data(data):
|
|||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
def plot_error_data(error_data):
|
def plot_error_data(error_data, fname=None):
|
||||||
fig, ax = plt.subplots()
|
fig, ax = plt.subplots()
|
||||||
ax.set_xlabel('k')
|
ax.set_xlabel('k')
|
||||||
ax.set_ylabel('err')
|
ax.set_ylabel('err')
|
||||||
@ -39,7 +39,10 @@ def plot_error_data(error_data):
|
|||||||
lst_y = list(lst_y)
|
lst_y = list(lst_y)
|
||||||
ax.plot(lst_x, lst_y, 'ro-')
|
ax.plot(lst_x, lst_y, 'ro-')
|
||||||
|
|
||||||
plt.show()
|
if fname:
|
||||||
|
plt.savefig(fname)
|
||||||
|
else:
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
def get_data1():
|
def get_data1():
|
||||||
|
61
zad3/zad3.py
61
zad3/zad3.py
@ -6,6 +6,9 @@ import json
|
|||||||
|
|
||||||
METHODS = ['forgy', 'random_partition']
|
METHODS = ['forgy', 'random_partition']
|
||||||
SOM_INIT_METHODS = ['random', 'zeros']
|
SOM_INIT_METHODS = ['random', 'zeros']
|
||||||
|
SOM_ALGORITHMS = ['kohonen', 'neuron gas']
|
||||||
|
SOM_PARAMETERS_SETS = [(.1, .5), (.1, .5), (.1, 1), (.33, .1), (.33, .5), (.33, 1), (.66, .1), (.66, .5), (.66, 1),
|
||||||
|
(.99, .1), (.99, .5), (.99, 1)]
|
||||||
|
|
||||||
|
|
||||||
def get_datas_from_json():
|
def get_datas_from_json():
|
||||||
@ -26,33 +29,46 @@ def get_datas_random():
|
|||||||
|
|
||||||
def main():
|
def main():
|
||||||
datas = get_datas_from_json()
|
datas = get_datas_from_json()
|
||||||
|
benchmark_errors = False
|
||||||
rand = np.random.RandomState(0)
|
rand = np.random.RandomState(0)
|
||||||
index = 1
|
index = 1
|
||||||
print("Self-organizing map")
|
print("Self-organizing map")
|
||||||
for data in datas:
|
for data in datas:
|
||||||
print(f'Data set: {index}')
|
print(f'Data set: {index}')
|
||||||
utils.plot_data(data)
|
utils.plot_data(data)
|
||||||
for method in SOM_INIT_METHODS:
|
for algorithm in SOM_ALGORITHMS:
|
||||||
print(f'Initialization method: {method}')
|
print(f'Weights update algorithm: {algorithm}')
|
||||||
errors = []
|
for method in SOM_INIT_METHODS:
|
||||||
for k in range(2, 21, 2):
|
print(f'Initialization method: {method}')
|
||||||
som_data = som.init_neurons(data, k, rand, method)
|
for param_set in SOM_PARAMETERS_SETS:
|
||||||
soms_with_error = som.train_som(som_data, data, algorithm='kohonen')
|
print(
|
||||||
error = soms_with_error[-1][1]
|
f'Learn rate: {param_set[0]}, Radius square: {param_set[1]}')
|
||||||
errors.append((k, error))
|
errors = {}
|
||||||
soms,_ = zip(*soms_with_error)
|
for k in range(2, 21):
|
||||||
#som.plot_with_data(soms, data, f'_{method}_{k}_data{index}')
|
som_data = som.init_neurons(data, k, rand, method)
|
||||||
utils.plot_error_data(errors)
|
soms_with_error = som.train_som(som_data, data, learn_rate=param_set[0], radius_sq=param_set[1],
|
||||||
soms_with_errors = []
|
algorithm=algorithm)
|
||||||
for _ in range(100):
|
error = soms_with_error[-1][1]
|
||||||
som_data = som.init_neurons(data, k, rand, method)
|
errors[k] = error
|
||||||
soms_with_error = som.train_som(som_data, data, algorithm='kohonen')
|
soms, _ = zip(*soms_with_error)
|
||||||
soms_with_errors.append(soms_with_error[-1])
|
som.plot_with_data(
|
||||||
som.print_som_stats(soms_with_errors, data)
|
soms, data, f'_LR{param_set[0]}_RSQ{param_set[1]}_{algorithm}_{method}_neurons{k}_data{index}')
|
||||||
|
if all([i in errors for i in range(2, 21, 2)]):
|
||||||
|
fname = f'som_errors_data{index}_{SOM_PARAMETERS_SETS.index(param_set)}_{algorithm}_{method}.png'
|
||||||
|
utils.plot_error_data([(k, errors[k]) for k in range(2, 21, 2)], fname=fname)
|
||||||
|
if benchmark_errors:
|
||||||
|
soms_with_errors = []
|
||||||
|
for _ in range(100):
|
||||||
|
som_data = som.init_neurons(data, 20, rand, method)
|
||||||
|
soms_with_error = som.train_som(
|
||||||
|
som_data, data, algorithm=algorithm)
|
||||||
|
soms_with_errors.append(soms_with_error[-1])
|
||||||
|
som.print_som_stats(soms_with_errors, data)
|
||||||
index += 1
|
index += 1
|
||||||
|
|
||||||
index = 1
|
index = 1
|
||||||
for data in datas:
|
for data in datas:
|
||||||
|
print(f'Data set {index}')
|
||||||
utils.plot_data(data)
|
utils.plot_data(data)
|
||||||
for method in METHODS:
|
for method in METHODS:
|
||||||
print(f'Method: {method}')
|
print(f'Method: {method}')
|
||||||
@ -60,7 +76,7 @@ def main():
|
|||||||
for k in range(2, 21):
|
for k in range(2, 21):
|
||||||
kmeans_with_err = km.kmeans(data, method, k)
|
kmeans_with_err = km.kmeans(data, method, k)
|
||||||
km.print_stats(k, [(iterations[-1], err)
|
km.print_stats(k, [(iterations[-1], err)
|
||||||
for iterations, err in kmeans_with_err])
|
for iterations, err in kmeans_with_err])
|
||||||
min_err = kmeans_with_err[0][1]
|
min_err = kmeans_with_err[0][1]
|
||||||
kmeans = kmeans_with_err[0][0]
|
kmeans = kmeans_with_err[0][0]
|
||||||
for temp_kmeans, err in kmeans_with_err:
|
for temp_kmeans, err in kmeans_with_err:
|
||||||
@ -69,9 +85,12 @@ def main():
|
|||||||
kmeans = temp_kmeans
|
kmeans = temp_kmeans
|
||||||
kmeans_data[k] = (kmeans, min_err)
|
kmeans_data[k] = (kmeans, min_err)
|
||||||
km.plot_kmeans(kmeans, k, f'_{method}_{k}_{index}')
|
km.plot_kmeans(kmeans, k, f'_{method}_{k}_{index}')
|
||||||
error_data = [[i, kmeans_data[i][1]] for i in range(2, 21, 2)]
|
if k in [2, 10]:
|
||||||
utils.plot_error_data(error_data)
|
km.plot_kmeans_change(kmeans, k, f'_{method}_{k}_{index}')
|
||||||
index += 1
|
if all([i in kmeans_data for i in range(2, 21, 2)]):
|
||||||
|
error_data = [[i, kmeans_data[i][1]] for i in range(2, 21, 2)]
|
||||||
|
utils.plot_error_data(error_data)
|
||||||
|
index += 1
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
Loading…
Reference in New Issue
Block a user