diff --git a/.gitattributes b/.gitattributes index b634d85..b00b518 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,3 @@ *.pdf filter=lfs diff=lfs merge=lfs -text +*.odt filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore index 6f66c74..95a7919 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,153 @@ -*.zip \ No newline at end of file +*.zip +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ \ No newline at end of file diff --git a/zad2/ml_195642_zad2.odt b/zad2/ml_195642_zad2.odt new file mode 100644 index 0000000..b1b0004 --- /dev/null +++ b/zad2/ml_195642_zad2.odt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e7a4a417d8c0c27e15b86dc40de94e044ee3152320f923b2021c155c355a633 +size 708612 diff --git a/zad2/ml_195642_zad2.pdf b/zad2/ml_195642_zad2.pdf new file mode 100644 index 0000000..5c7d643 --- /dev/null +++ b/zad2/ml_195642_zad2.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48bd356d3f9958a702b3ad7ec794b284ca715e393331a4e15a0a1244a2283a22 +size 556606 diff --git a/zad2/wykresy/data1_m1.png b/zad2/wykresy/data1_m1.png new file mode 100644 index 0000000..59707cb --- /dev/null +++ b/zad2/wykresy/data1_m1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b572989e61fb7680f3b11429aac0b612275cdc922ac06646e00328a054382457 +size 25702 diff --git a/zad2/wykresy/data1_m1_h.png b/zad2/wykresy/data1_m1_h.png new file mode 100644 index 0000000..eb98043 --- /dev/null +++ b/zad2/wykresy/data1_m1_h.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cec84e878a94fa0d5e56687e2c63335eae1634caff2bd63dcde4c42005bc151 +size 15829 diff --git a/zad2/wykresy/data1_m2.png b/zad2/wykresy/data1_m2.png new file mode 100644 index 0000000..5936e4d --- /dev/null +++ b/zad2/wykresy/data1_m2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ecbf5d74fb793b328230424814081d3054aae6caab32e50dc097a80a34ed3a9 +size 25338 diff --git a/zad2/wykresy/data1_m2_h.png b/zad2/wykresy/data1_m2_h.png new file mode 100644 index 0000000..f5d0635 --- /dev/null +++ b/zad2/wykresy/data1_m2_h.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12ef348e377b9f096999189996f27b1578345ae5b061df9e0e0d0912ecf8ae87 +size 14184 diff --git a/zad2/wykresy/data1_m3.png b/zad2/wykresy/data1_m3.png new file mode 100644 index 0000000..1f0c4bd --- /dev/null +++ b/zad2/wykresy/data1_m3.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37955146606d7c4473cf283bb1e93579c883cf3206d1311a49340c42495c96c2 +size 25575 diff --git a/zad2/wykresy/data1_m3_h.png b/zad2/wykresy/data1_m3_h.png new file mode 100644 index 0000000..90ae7d1 --- /dev/null +++ b/zad2/wykresy/data1_m3_h.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b5a801ce88bda132a4c9b264cd2a19894180a9454a1a97fd2576d8c54df5f24 +size 14458 diff --git a/zad2/wykresy/data2_m1.png b/zad2/wykresy/data2_m1.png new file mode 100644 index 0000000..b0e7e4b --- /dev/null +++ b/zad2/wykresy/data2_m1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93966db0b3fbfcbb6a0cf2d2929e9c03e01f09e1012e64f6190484ac66765a04 +size 22737 diff --git a/zad2/wykresy/data2_m1_h.png b/zad2/wykresy/data2_m1_h.png new file mode 100644 index 0000000..9e3e1c1 --- /dev/null +++ b/zad2/wykresy/data2_m1_h.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8440cca4c6b03d8d38d9558fb40ada422d7dd0ea8904c503c1b492a186bda84a +size 15713 diff --git a/zad2/wykresy/data2_m2.png b/zad2/wykresy/data2_m2.png new file mode 100644 index 0000000..885b7d1 --- /dev/null +++ b/zad2/wykresy/data2_m2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b26de1fe90cef11931329cb284a1158869927d516609c79de2438dcfef3611b9 +size 23292 diff --git a/zad2/wykresy/data2_m2_h.png b/zad2/wykresy/data2_m2_h.png new file mode 100644 index 0000000..eabcfc6 --- /dev/null +++ b/zad2/wykresy/data2_m2_h.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e13eb1375befaa76c53fc5ba6a5e300b5c0b24843648aa49d73a0c0f6440456 +size 14372 diff --git a/zad2/wykresy/data2_m3.png b/zad2/wykresy/data2_m3.png new file mode 100644 index 0000000..198a529 --- /dev/null +++ b/zad2/wykresy/data2_m3.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee2343d37a954d226c26d13fc2d79ad6fc5d629e7c22c75250f7cbe6f9390a5b +size 29874 diff --git a/zad2/wykresy/data2_m3_h.png b/zad2/wykresy/data2_m3_h.png new file mode 100644 index 0000000..b341028 --- /dev/null +++ b/zad2/wykresy/data2_m3_h.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36036f818d03229becab3fe97d637bf872d0dbe673bd541e6ca08bb9f574bdf6 +size 17357 diff --git a/zad2/wykresy/data3_m4.png b/zad2/wykresy/data3_m4.png new file mode 100644 index 0000000..28b817f --- /dev/null +++ b/zad2/wykresy/data3_m4.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d43d86e2d69e8f54745c820f220caf1304acc22b7f8137d38c6b4329ca1d580 +size 67688 diff --git a/zad2/wykresy/data3_m4_h.png b/zad2/wykresy/data3_m4_h.png new file mode 100644 index 0000000..ef85f72 --- /dev/null +++ b/zad2/wykresy/data3_m4_h.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64efa3a136840e5f2cbd7fe55dfbb83e22fc54a52be70c3a997d3cb3fb0d9482 +size 14481 diff --git a/zad2/wykresy/data3_m5.png b/zad2/wykresy/data3_m5.png new file mode 100644 index 0000000..49de4c7 --- /dev/null +++ b/zad2/wykresy/data3_m5.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4855f544d6e1e309af7d15466ec701f401464e454320fcb707089bfedf5f116 +size 64273 diff --git a/zad2/wykresy/data3_m5_h.png b/zad2/wykresy/data3_m5_h.png new file mode 100644 index 0000000..d1eca92 --- /dev/null +++ b/zad2/wykresy/data3_m5_h.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddcc7212e751bc65195e3be4c7635a329f83ef9eedfa410b5262c97d6b3e0f96 +size 16526 diff --git a/zad2/wykresy/data4_m4.png b/zad2/wykresy/data4_m4.png new file mode 100644 index 0000000..cb8a943 --- /dev/null +++ b/zad2/wykresy/data4_m4.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:581d555ebea0ba5240419baaefb8534978d60014f0a733e237d0b3fe4e1aba35 +size 67235 diff --git a/zad2/wykresy/data4_m4_h.png b/zad2/wykresy/data4_m4_h.png new file mode 100644 index 0000000..cf33462 --- /dev/null +++ b/zad2/wykresy/data4_m4_h.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa1ee99b31be679801d8d06ebc1297a0020bc724e4004521ad6fe23630c9d43f +size 14058 diff --git a/zad2/wykresy/data4_m5.png b/zad2/wykresy/data4_m5.png new file mode 100644 index 0000000..3ff72f3 --- /dev/null +++ b/zad2/wykresy/data4_m5.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cddf6b7fdbb20d138102d4502f589a89e4e00c4611387d7a35449b4af41db6c8 +size 72850 diff --git a/zad2/wykresy/data4_m5_h.png b/zad2/wykresy/data4_m5_h.png new file mode 100644 index 0000000..ac3f2fb --- /dev/null +++ b/zad2/wykresy/data4_m5_h.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caa145014bf102735fa7d2e29cc3bc43348c835690a41b96513efca4d9edce91 +size 16170 diff --git a/zad2/zad2.py b/zad2/zad2.py index 25bae13..81251aa 100644 --- a/zad2/zad2.py +++ b/zad2/zad2.py @@ -3,7 +3,11 @@ Komputerowa analiza danych Zadanie 2 Michał Leśniak 195642 """ +from math import sin from statistics import mean +import matplotlib.pyplot as plt +from chi2_normality import chi2normality_describe +import numpy as np def var(lst): @@ -30,41 +34,144 @@ def load_data(*args): return ret -def model1(data): - lst_x = [x for x, _ in data] - lst_y = [y for _, y in data] +def reglin(data, name, model): + model_func, use_reglinw, func_str = model + if use_reglinw: + Y, Z, param_str = reglinw(data, model_func) + else: + Y, Z, param_str = reglinp(data, model_func) - a = cov(lst_x, lst_y)/var(lst_x) - print(f'f(X) = {a} * X') + err = Y-Z + lst_err = np.transpose(err)[0].tolist() + lst_y = np.transpose(Y)[0].tolist() + lst_z = np.transpose(Z)[0].tolist() + mse = mean([x**2 for x in lst_err]) + md = max([abs(x) for x in lst_err]) + var_err = var(lst_err) + var_y = var(lst_y) + r2 = 1-(var_err/var_y) + if len(data[0]) > 2: + print(f'Regresja liniowa wielu zmiennych dla {name}:') + else: + print(f'Prosta regresja liniowa jednej zmiennej dla {name}:') + print(func_str) + print(param_str) + print(f'MSE={mse}') + print(f'maxD={md}') + print(f'VarErr<=VarY - {var_err<=var_y}') + print(f'r2={r2}') + chi2normality_describe(lst_err) + + lst_z = np.transpose(Z)[0].tolist() + + if len(data[0]) == 2: # print 2D + lst_x, lst_y = zip(*data) + lst_x = list(lst_x) + lst_y = list(lst_y) + plt.figure(1) + ax = plt.axes() + ax.scatter(lst_x, lst_y) + ax.plot(lst_x, lst_z, 'r-') + ax.set_xlabel('X') + ax.set_ylabel('Y') + plt.grid(True) + elif len(data[0]) == 3: + lst_x1, lst_x2, lst_y = zip(*data) + lst_x1 = list(lst_x1) + lst_x2 = list(lst_x2) + lst_y = list(lst_y) + plt.figure(1) + ax = plt.axes(projection='3d') + ax.scatter(lst_x1, lst_x2, lst_y) + ax.scatter(lst_x1, lst_x2, lst_z, color='r') + ax.set_xlabel('X1') + ax.set_ylabel('X2') + ax.set_zlabel('Y') + else: + raise RuntimeError + plt.title(f'{name}\n{func_str}') + plt.figure(2) + plt.hist(err, 50) + plt.xlabel('Err') + plt.title(f'Histogram Err dla {name}\n{func_str}') + plt.grid(True) + plt.show() -def model2(data): - lst_x = [x for x, _ in data] - lst_y = [y for _, y in data] +def reglinp(data, model_func): + lst_x, lst_y = zip(*data) + lst_x = list(lst_x) + lst_y = list(lst_y) + return model_func(lst_x, lst_y) + +def reglinw(data, prepare_data): + X, Y = prepare_data(data) + XT = np.transpose(X) + XTX = np.matmul(XT, X) + try: + inv_XTX = np.linalg.inv(XTX) + except np.linang.LinAlgError: + print("XTX is not inversible") + raise + A = np.matmul(np.matmul(inv_XTX, XT), Y) + Z = np.matmul(X, A) + + params = [a[0] for a in A] + params = params[1:] + params[:1] + param_str = [] + for i in range(len(params)): + param_str.append(f'{chr(ord("a")+i)} = {params[i]}') + return Y, Z, '\n'.join(param_str) + + +def model_func1(lst_x, lst_y): + a = mean([lst_y[i]*lst_x[i] for i in range(len(lst_x))]) / \ + mean([x**2 for x in lst_x]) + Y = np.array([list((y,)) for y in lst_y]) + Z = np.array([list((a*x,))for x in lst_x]) + return Y, Z, f'a = {a}' + + +def model_func2(lst_x, lst_y): a = cov(lst_x, lst_y)/var(lst_x) b = mean(lst_y) - a*mean(lst_x) - print(f'f(X) = {a} * X + {b}') + Y = np.array([list((y,)) for y in lst_y]) + Z = np.array([list((a*x+b,))for x in lst_x]) + return Y, Z, f'a = {a}\nb = {b}' + + +def model_func3(data): + return np.array([list((1.0, x**2, sin(x))) for x, _ in data]), np.array([list((y,)) for _, y in data]) + + +def model_func4(data): + return np.array([list((1.0, x1, x2)) for x1, x2, _ in data]), np.array([list((y,)) for _, _, y in data]) + + +def model_func5(data): + return np.array([list((1.0, x1**2, x1*x2, x2**2, x1, x2)) for x1, x2, _ in data]), np.array([list((y,)) for _, _, y in data]) + + +MODELS = [ + (model_func1, False, '$f(X) = aX$'), + (model_func2, False, '$f(X) = aX + b$'), + (model_func3, True, '$f(X) = aX^2 + bsin(X) + c$'), + (model_func4, True, '$f(X_1, X_2) = aX_1 + bX_2 + c$'), + (model_func5, True, + r'$f(X_1, X_2) = a{X_1}^2 + bX_1 X_2 + c{X_2}^2 +dX_1 +eX_2 +f$') +] def main(): data1, data2, data3, data4 = load_data( 'data1.csv', 'data2.csv', 'data3.csv', 'data4.csv') - print(var([x for x, _ in data1])) - print(cov([x for x, _ in data1], [y for _, y in data1])) - # print(data2) - # print(data3) - # print(data4) - model1(data1) - model1(data2) - model2(data1) - model2(data2) - x_mean = mean([x for x, _ in data1]) - y_mean = mean([y for _, y in data1]) - xy = sum([x*y for x, y in data1]) - x_2 = sum([x**2 for x, _ in data1]) - print(sum([2*x-2*x*y for x,y in data1])/len(data1)) - print(xy/x_2) + for i in range(3): + reglin(data1, 'data1.csv', MODELS[i]) + reglin(data2, 'data2.csv', MODELS[i]) + for i in range(3, 5): + reglin(data3, 'data3.csv', MODELS[i]) + reglin(data4, 'data4.csv', MODELS[i]) if __name__ == '__main__':