Zad2 skończone
This commit is contained in:
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
+131
-24
@@ -3,7 +3,11 @@ Komputerowa analiza danych
|
||||
Zadanie 2
|
||||
Michał Leśniak 195642
|
||||
"""
|
||||
from math import sin
|
||||
from statistics import mean
|
||||
import matplotlib.pyplot as plt
|
||||
from chi2_normality import chi2normality_describe
|
||||
import numpy as np
|
||||
|
||||
|
||||
def var(lst):
|
||||
@@ -30,41 +34,144 @@ def load_data(*args):
|
||||
return ret
|
||||
|
||||
|
||||
def model1(data):
|
||||
lst_x = [x for x, _ in data]
|
||||
lst_y = [y for _, y in data]
|
||||
def reglin(data, name, model):
|
||||
model_func, use_reglinw, func_str = model
|
||||
if use_reglinw:
|
||||
Y, Z, param_str = reglinw(data, model_func)
|
||||
else:
|
||||
Y, Z, param_str = reglinp(data, model_func)
|
||||
|
||||
a = cov(lst_x, lst_y)/var(lst_x)
|
||||
print(f'f(X) = {a} * X')
|
||||
err = Y-Z
|
||||
lst_err = np.transpose(err)[0].tolist()
|
||||
lst_y = np.transpose(Y)[0].tolist()
|
||||
lst_z = np.transpose(Z)[0].tolist()
|
||||
mse = mean([x**2 for x in lst_err])
|
||||
md = max([abs(x) for x in lst_err])
|
||||
var_err = var(lst_err)
|
||||
var_y = var(lst_y)
|
||||
r2 = 1-(var_err/var_y)
|
||||
if len(data[0]) > 2:
|
||||
print(f'Regresja liniowa wielu zmiennych dla {name}:')
|
||||
else:
|
||||
print(f'Prosta regresja liniowa jednej zmiennej dla {name}:')
|
||||
print(func_str)
|
||||
print(param_str)
|
||||
print(f'MSE={mse}')
|
||||
print(f'maxD={md}')
|
||||
print(f'VarErr<=VarY - {var_err<=var_y}')
|
||||
print(f'r2={r2}')
|
||||
chi2normality_describe(lst_err)
|
||||
|
||||
lst_z = np.transpose(Z)[0].tolist()
|
||||
|
||||
if len(data[0]) == 2: # print 2D
|
||||
lst_x, lst_y = zip(*data)
|
||||
lst_x = list(lst_x)
|
||||
lst_y = list(lst_y)
|
||||
plt.figure(1)
|
||||
ax = plt.axes()
|
||||
ax.scatter(lst_x, lst_y)
|
||||
ax.plot(lst_x, lst_z, 'r-')
|
||||
ax.set_xlabel('X')
|
||||
ax.set_ylabel('Y')
|
||||
plt.grid(True)
|
||||
elif len(data[0]) == 3:
|
||||
lst_x1, lst_x2, lst_y = zip(*data)
|
||||
lst_x1 = list(lst_x1)
|
||||
lst_x2 = list(lst_x2)
|
||||
lst_y = list(lst_y)
|
||||
plt.figure(1)
|
||||
ax = plt.axes(projection='3d')
|
||||
ax.scatter(lst_x1, lst_x2, lst_y)
|
||||
ax.scatter(lst_x1, lst_x2, lst_z, color='r')
|
||||
ax.set_xlabel('X1')
|
||||
ax.set_ylabel('X2')
|
||||
ax.set_zlabel('Y')
|
||||
else:
|
||||
raise RuntimeError
|
||||
plt.title(f'{name}\n{func_str}')
|
||||
plt.figure(2)
|
||||
plt.hist(err, 50)
|
||||
plt.xlabel('Err')
|
||||
plt.title(f'Histogram Err dla {name}\n{func_str}')
|
||||
plt.grid(True)
|
||||
plt.show()
|
||||
|
||||
|
||||
def model2(data):
|
||||
lst_x = [x for x, _ in data]
|
||||
lst_y = [y for _, y in data]
|
||||
def reglinp(data, model_func):
|
||||
lst_x, lst_y = zip(*data)
|
||||
lst_x = list(lst_x)
|
||||
lst_y = list(lst_y)
|
||||
return model_func(lst_x, lst_y)
|
||||
|
||||
|
||||
def reglinw(data, prepare_data):
|
||||
X, Y = prepare_data(data)
|
||||
XT = np.transpose(X)
|
||||
XTX = np.matmul(XT, X)
|
||||
try:
|
||||
inv_XTX = np.linalg.inv(XTX)
|
||||
except np.linang.LinAlgError:
|
||||
print("XTX is not inversible")
|
||||
raise
|
||||
A = np.matmul(np.matmul(inv_XTX, XT), Y)
|
||||
Z = np.matmul(X, A)
|
||||
|
||||
params = [a[0] for a in A]
|
||||
params = params[1:] + params[:1]
|
||||
param_str = []
|
||||
for i in range(len(params)):
|
||||
param_str.append(f'{chr(ord("a")+i)} = {params[i]}')
|
||||
return Y, Z, '\n'.join(param_str)
|
||||
|
||||
|
||||
def model_func1(lst_x, lst_y):
|
||||
a = mean([lst_y[i]*lst_x[i] for i in range(len(lst_x))]) / \
|
||||
mean([x**2 for x in lst_x])
|
||||
Y = np.array([list((y,)) for y in lst_y])
|
||||
Z = np.array([list((a*x,))for x in lst_x])
|
||||
return Y, Z, f'a = {a}'
|
||||
|
||||
|
||||
def model_func2(lst_x, lst_y):
|
||||
a = cov(lst_x, lst_y)/var(lst_x)
|
||||
b = mean(lst_y) - a*mean(lst_x)
|
||||
print(f'f(X) = {a} * X + {b}')
|
||||
Y = np.array([list((y,)) for y in lst_y])
|
||||
Z = np.array([list((a*x+b,))for x in lst_x])
|
||||
return Y, Z, f'a = {a}\nb = {b}'
|
||||
|
||||
|
||||
def model_func3(data):
|
||||
return np.array([list((1.0, x**2, sin(x))) for x, _ in data]), np.array([list((y,)) for _, y in data])
|
||||
|
||||
|
||||
def model_func4(data):
|
||||
return np.array([list((1.0, x1, x2)) for x1, x2, _ in data]), np.array([list((y,)) for _, _, y in data])
|
||||
|
||||
|
||||
def model_func5(data):
|
||||
return np.array([list((1.0, x1**2, x1*x2, x2**2, x1, x2)) for x1, x2, _ in data]), np.array([list((y,)) for _, _, y in data])
|
||||
|
||||
|
||||
MODELS = [
|
||||
(model_func1, False, '$f(X) = aX$'),
|
||||
(model_func2, False, '$f(X) = aX + b$'),
|
||||
(model_func3, True, '$f(X) = aX^2 + bsin(X) + c$'),
|
||||
(model_func4, True, '$f(X_1, X_2) = aX_1 + bX_2 + c$'),
|
||||
(model_func5, True,
|
||||
r'$f(X_1, X_2) = a{X_1}^2 + bX_1 X_2 + c{X_2}^2 +dX_1 +eX_2 +f$')
|
||||
]
|
||||
|
||||
|
||||
def main():
|
||||
data1, data2, data3, data4 = load_data(
|
||||
'data1.csv', 'data2.csv', 'data3.csv', 'data4.csv')
|
||||
print(var([x for x, _ in data1]))
|
||||
print(cov([x for x, _ in data1], [y for _, y in data1]))
|
||||
# print(data2)
|
||||
# print(data3)
|
||||
# print(data4)
|
||||
model1(data1)
|
||||
model1(data2)
|
||||
model2(data1)
|
||||
model2(data2)
|
||||
x_mean = mean([x for x, _ in data1])
|
||||
y_mean = mean([y for _, y in data1])
|
||||
xy = sum([x*y for x, y in data1])
|
||||
x_2 = sum([x**2 for x, _ in data1])
|
||||
print(sum([2*x-2*x*y for x,y in data1])/len(data1))
|
||||
print(xy/x_2)
|
||||
for i in range(3):
|
||||
reglin(data1, 'data1.csv', MODELS[i])
|
||||
reglin(data2, 'data2.csv', MODELS[i])
|
||||
for i in range(3, 5):
|
||||
reglin(data3, 'data3.csv', MODELS[i])
|
||||
reglin(data4, 'data4.csv', MODELS[i])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
Reference in New Issue
Block a user