Zad2 skończone

This commit is contained in:
2021-12-20 09:37:15 +01:00
parent 2efbd423ad
commit fc5a5d8599
25 changed files with 352 additions and 25 deletions
+2
View File
@@ -1 +1,3 @@
*.pdf filter=lfs diff=lfs merge=lfs -text *.pdf filter=lfs diff=lfs merge=lfs -text
*.odt filter=lfs diff=lfs merge=lfs -text
*.png filter=lfs diff=lfs merge=lfs -text
+153 -1
View File
@@ -1 +1,153 @@
*.zip *.zip
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
BIN
View File
Binary file not shown.
BIN
View File
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
+131 -24
View File
@@ -3,7 +3,11 @@ Komputerowa analiza danych
Zadanie 2 Zadanie 2
Michał Leśniak 195642 Michał Leśniak 195642
""" """
from math import sin
from statistics import mean from statistics import mean
import matplotlib.pyplot as plt
from chi2_normality import chi2normality_describe
import numpy as np
def var(lst): def var(lst):
@@ -30,41 +34,144 @@ def load_data(*args):
return ret return ret
def model1(data): def reglin(data, name, model):
lst_x = [x for x, _ in data] model_func, use_reglinw, func_str = model
lst_y = [y for _, y in data] if use_reglinw:
Y, Z, param_str = reglinw(data, model_func)
else:
Y, Z, param_str = reglinp(data, model_func)
a = cov(lst_x, lst_y)/var(lst_x) err = Y-Z
print(f'f(X) = {a} * X') lst_err = np.transpose(err)[0].tolist()
lst_y = np.transpose(Y)[0].tolist()
lst_z = np.transpose(Z)[0].tolist()
mse = mean([x**2 for x in lst_err])
md = max([abs(x) for x in lst_err])
var_err = var(lst_err)
var_y = var(lst_y)
r2 = 1-(var_err/var_y)
if len(data[0]) > 2:
print(f'Regresja liniowa wielu zmiennych dla {name}:')
else:
print(f'Prosta regresja liniowa jednej zmiennej dla {name}:')
print(func_str)
print(param_str)
print(f'MSE={mse}')
print(f'maxD={md}')
print(f'VarErr<=VarY - {var_err<=var_y}')
print(f'r2={r2}')
chi2normality_describe(lst_err)
lst_z = np.transpose(Z)[0].tolist()
if len(data[0]) == 2: # print 2D
lst_x, lst_y = zip(*data)
lst_x = list(lst_x)
lst_y = list(lst_y)
plt.figure(1)
ax = plt.axes()
ax.scatter(lst_x, lst_y)
ax.plot(lst_x, lst_z, 'r-')
ax.set_xlabel('X')
ax.set_ylabel('Y')
plt.grid(True)
elif len(data[0]) == 3:
lst_x1, lst_x2, lst_y = zip(*data)
lst_x1 = list(lst_x1)
lst_x2 = list(lst_x2)
lst_y = list(lst_y)
plt.figure(1)
ax = plt.axes(projection='3d')
ax.scatter(lst_x1, lst_x2, lst_y)
ax.scatter(lst_x1, lst_x2, lst_z, color='r')
ax.set_xlabel('X1')
ax.set_ylabel('X2')
ax.set_zlabel('Y')
else:
raise RuntimeError
plt.title(f'{name}\n{func_str}')
plt.figure(2)
plt.hist(err, 50)
plt.xlabel('Err')
plt.title(f'Histogram Err dla {name}\n{func_str}')
plt.grid(True)
plt.show()
def model2(data): def reglinp(data, model_func):
lst_x = [x for x, _ in data] lst_x, lst_y = zip(*data)
lst_y = [y for _, y in data] lst_x = list(lst_x)
lst_y = list(lst_y)
return model_func(lst_x, lst_y)
def reglinw(data, prepare_data):
X, Y = prepare_data(data)
XT = np.transpose(X)
XTX = np.matmul(XT, X)
try:
inv_XTX = np.linalg.inv(XTX)
except np.linang.LinAlgError:
print("XTX is not inversible")
raise
A = np.matmul(np.matmul(inv_XTX, XT), Y)
Z = np.matmul(X, A)
params = [a[0] for a in A]
params = params[1:] + params[:1]
param_str = []
for i in range(len(params)):
param_str.append(f'{chr(ord("a")+i)} = {params[i]}')
return Y, Z, '\n'.join(param_str)
def model_func1(lst_x, lst_y):
a = mean([lst_y[i]*lst_x[i] for i in range(len(lst_x))]) / \
mean([x**2 for x in lst_x])
Y = np.array([list((y,)) for y in lst_y])
Z = np.array([list((a*x,))for x in lst_x])
return Y, Z, f'a = {a}'
def model_func2(lst_x, lst_y):
a = cov(lst_x, lst_y)/var(lst_x) a = cov(lst_x, lst_y)/var(lst_x)
b = mean(lst_y) - a*mean(lst_x) b = mean(lst_y) - a*mean(lst_x)
print(f'f(X) = {a} * X + {b}') Y = np.array([list((y,)) for y in lst_y])
Z = np.array([list((a*x+b,))for x in lst_x])
return Y, Z, f'a = {a}\nb = {b}'
def model_func3(data):
return np.array([list((1.0, x**2, sin(x))) for x, _ in data]), np.array([list((y,)) for _, y in data])
def model_func4(data):
return np.array([list((1.0, x1, x2)) for x1, x2, _ in data]), np.array([list((y,)) for _, _, y in data])
def model_func5(data):
return np.array([list((1.0, x1**2, x1*x2, x2**2, x1, x2)) for x1, x2, _ in data]), np.array([list((y,)) for _, _, y in data])
MODELS = [
(model_func1, False, '$f(X) = aX$'),
(model_func2, False, '$f(X) = aX + b$'),
(model_func3, True, '$f(X) = aX^2 + bsin(X) + c$'),
(model_func4, True, '$f(X_1, X_2) = aX_1 + bX_2 + c$'),
(model_func5, True,
r'$f(X_1, X_2) = a{X_1}^2 + bX_1 X_2 + c{X_2}^2 +dX_1 +eX_2 +f$')
]
def main(): def main():
data1, data2, data3, data4 = load_data( data1, data2, data3, data4 = load_data(
'data1.csv', 'data2.csv', 'data3.csv', 'data4.csv') 'data1.csv', 'data2.csv', 'data3.csv', 'data4.csv')
print(var([x for x, _ in data1])) for i in range(3):
print(cov([x for x, _ in data1], [y for _, y in data1])) reglin(data1, 'data1.csv', MODELS[i])
# print(data2) reglin(data2, 'data2.csv', MODELS[i])
# print(data3) for i in range(3, 5):
# print(data4) reglin(data3, 'data3.csv', MODELS[i])
model1(data1) reglin(data4, 'data4.csv', MODELS[i])
model1(data2)
model2(data1)
model2(data2)
x_mean = mean([x for x, _ in data1])
y_mean = mean([y for _, y in data1])
xy = sum([x*y for x, y in data1])
x_2 = sum([x**2 for x, _ in data1])
print(sum([2*x-2*x*y for x,y in data1])/len(data1))
print(xy/x_2)
if __name__ == '__main__': if __name__ == '__main__':