Zad2 skończone

This commit is contained in:
2021-12-20 09:37:15 +01:00
parent 2efbd423ad
commit fc5a5d8599
25 changed files with 352 additions and 25 deletions
BIN
View File
Binary file not shown.
BIN
View File
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
+131 -24
View File
@@ -3,7 +3,11 @@ Komputerowa analiza danych
Zadanie 2
Michał Leśniak 195642
"""
from math import sin
from statistics import mean
import matplotlib.pyplot as plt
from chi2_normality import chi2normality_describe
import numpy as np
def var(lst):
@@ -30,41 +34,144 @@ def load_data(*args):
return ret
def model1(data):
lst_x = [x for x, _ in data]
lst_y = [y for _, y in data]
def reglin(data, name, model):
model_func, use_reglinw, func_str = model
if use_reglinw:
Y, Z, param_str = reglinw(data, model_func)
else:
Y, Z, param_str = reglinp(data, model_func)
a = cov(lst_x, lst_y)/var(lst_x)
print(f'f(X) = {a} * X')
err = Y-Z
lst_err = np.transpose(err)[0].tolist()
lst_y = np.transpose(Y)[0].tolist()
lst_z = np.transpose(Z)[0].tolist()
mse = mean([x**2 for x in lst_err])
md = max([abs(x) for x in lst_err])
var_err = var(lst_err)
var_y = var(lst_y)
r2 = 1-(var_err/var_y)
if len(data[0]) > 2:
print(f'Regresja liniowa wielu zmiennych dla {name}:')
else:
print(f'Prosta regresja liniowa jednej zmiennej dla {name}:')
print(func_str)
print(param_str)
print(f'MSE={mse}')
print(f'maxD={md}')
print(f'VarErr<=VarY - {var_err<=var_y}')
print(f'r2={r2}')
chi2normality_describe(lst_err)
lst_z = np.transpose(Z)[0].tolist()
if len(data[0]) == 2: # print 2D
lst_x, lst_y = zip(*data)
lst_x = list(lst_x)
lst_y = list(lst_y)
plt.figure(1)
ax = plt.axes()
ax.scatter(lst_x, lst_y)
ax.plot(lst_x, lst_z, 'r-')
ax.set_xlabel('X')
ax.set_ylabel('Y')
plt.grid(True)
elif len(data[0]) == 3:
lst_x1, lst_x2, lst_y = zip(*data)
lst_x1 = list(lst_x1)
lst_x2 = list(lst_x2)
lst_y = list(lst_y)
plt.figure(1)
ax = plt.axes(projection='3d')
ax.scatter(lst_x1, lst_x2, lst_y)
ax.scatter(lst_x1, lst_x2, lst_z, color='r')
ax.set_xlabel('X1')
ax.set_ylabel('X2')
ax.set_zlabel('Y')
else:
raise RuntimeError
plt.title(f'{name}\n{func_str}')
plt.figure(2)
plt.hist(err, 50)
plt.xlabel('Err')
plt.title(f'Histogram Err dla {name}\n{func_str}')
plt.grid(True)
plt.show()
def model2(data):
lst_x = [x for x, _ in data]
lst_y = [y for _, y in data]
def reglinp(data, model_func):
lst_x, lst_y = zip(*data)
lst_x = list(lst_x)
lst_y = list(lst_y)
return model_func(lst_x, lst_y)
def reglinw(data, prepare_data):
X, Y = prepare_data(data)
XT = np.transpose(X)
XTX = np.matmul(XT, X)
try:
inv_XTX = np.linalg.inv(XTX)
except np.linang.LinAlgError:
print("XTX is not inversible")
raise
A = np.matmul(np.matmul(inv_XTX, XT), Y)
Z = np.matmul(X, A)
params = [a[0] for a in A]
params = params[1:] + params[:1]
param_str = []
for i in range(len(params)):
param_str.append(f'{chr(ord("a")+i)} = {params[i]}')
return Y, Z, '\n'.join(param_str)
def model_func1(lst_x, lst_y):
a = mean([lst_y[i]*lst_x[i] for i in range(len(lst_x))]) / \
mean([x**2 for x in lst_x])
Y = np.array([list((y,)) for y in lst_y])
Z = np.array([list((a*x,))for x in lst_x])
return Y, Z, f'a = {a}'
def model_func2(lst_x, lst_y):
a = cov(lst_x, lst_y)/var(lst_x)
b = mean(lst_y) - a*mean(lst_x)
print(f'f(X) = {a} * X + {b}')
Y = np.array([list((y,)) for y in lst_y])
Z = np.array([list((a*x+b,))for x in lst_x])
return Y, Z, f'a = {a}\nb = {b}'
def model_func3(data):
return np.array([list((1.0, x**2, sin(x))) for x, _ in data]), np.array([list((y,)) for _, y in data])
def model_func4(data):
return np.array([list((1.0, x1, x2)) for x1, x2, _ in data]), np.array([list((y,)) for _, _, y in data])
def model_func5(data):
return np.array([list((1.0, x1**2, x1*x2, x2**2, x1, x2)) for x1, x2, _ in data]), np.array([list((y,)) for _, _, y in data])
MODELS = [
(model_func1, False, '$f(X) = aX$'),
(model_func2, False, '$f(X) = aX + b$'),
(model_func3, True, '$f(X) = aX^2 + bsin(X) + c$'),
(model_func4, True, '$f(X_1, X_2) = aX_1 + bX_2 + c$'),
(model_func5, True,
r'$f(X_1, X_2) = a{X_1}^2 + bX_1 X_2 + c{X_2}^2 +dX_1 +eX_2 +f$')
]
def main():
data1, data2, data3, data4 = load_data(
'data1.csv', 'data2.csv', 'data3.csv', 'data4.csv')
print(var([x for x, _ in data1]))
print(cov([x for x, _ in data1], [y for _, y in data1]))
# print(data2)
# print(data3)
# print(data4)
model1(data1)
model1(data2)
model2(data1)
model2(data2)
x_mean = mean([x for x, _ in data1])
y_mean = mean([y for _, y in data1])
xy = sum([x*y for x, y in data1])
x_2 = sum([x**2 for x, _ in data1])
print(sum([2*x-2*x*y for x,y in data1])/len(data1))
print(xy/x_2)
for i in range(3):
reglin(data1, 'data1.csv', MODELS[i])
reglin(data2, 'data2.csv', MODELS[i])
for i in range(3, 5):
reglin(data3, 'data3.csv', MODELS[i])
reglin(data4, 'data4.csv', MODELS[i])
if __name__ == '__main__':