Zad2 skończone

2021-12-20 09:37:15 +01:00
parent 2efbd423ad
commit fc5a5d8599
25 changed files with 352 additions and 25 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -1 +1,3 @@
 *.pdf filter=lfs diff=lfs merge=lfs -text
 *.odt filter=lfs diff=lfs merge=lfs -text
 *.png filter=lfs diff=lfs merge=lfs -text
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,153 @@
-*.zip
+*.zip
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 cover/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 .pybuilder/
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:
 # .python-version
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 #Pipfile.lock
 # poetry
 #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
 #poetry.lock
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
 # pytype static type analyzer
 .pytype/
 # Cython debug symbols
 cython_debug/
 # PyCharm
 #  JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
--- a/zad2/ml_195642_zad2.odt
+++ b/zad2/ml_195642_zad2.odt
--- a/zad2/ml_195642_zad2.pdf
+++ b/zad2/ml_195642_zad2.pdf
--- a/zad2/wykresy/data1_m1.png
+++ b/zad2/wykresy/data1_m1.png
--- a/zad2/wykresy/data1_m1_h.png
+++ b/zad2/wykresy/data1_m1_h.png
--- a/zad2/wykresy/data1_m2.png
+++ b/zad2/wykresy/data1_m2.png
--- a/zad2/wykresy/data1_m2_h.png
+++ b/zad2/wykresy/data1_m2_h.png
--- a/zad2/wykresy/data1_m3.png
+++ b/zad2/wykresy/data1_m3.png
--- a/zad2/wykresy/data1_m3_h.png
+++ b/zad2/wykresy/data1_m3_h.png
--- a/zad2/wykresy/data2_m1.png
+++ b/zad2/wykresy/data2_m1.png
--- a/zad2/wykresy/data2_m1_h.png
+++ b/zad2/wykresy/data2_m1_h.png
--- a/zad2/wykresy/data2_m2.png
+++ b/zad2/wykresy/data2_m2.png
--- a/zad2/wykresy/data2_m2_h.png
+++ b/zad2/wykresy/data2_m2_h.png
--- a/zad2/wykresy/data2_m3.png
+++ b/zad2/wykresy/data2_m3.png
--- a/zad2/wykresy/data2_m3_h.png
+++ b/zad2/wykresy/data2_m3_h.png
--- a/zad2/wykresy/data3_m4.png
+++ b/zad2/wykresy/data3_m4.png
--- a/zad2/wykresy/data3_m4_h.png
+++ b/zad2/wykresy/data3_m4_h.png
--- a/zad2/wykresy/data3_m5.png
+++ b/zad2/wykresy/data3_m5.png
--- a/zad2/wykresy/data3_m5_h.png
+++ b/zad2/wykresy/data3_m5_h.png
--- a/zad2/wykresy/data4_m4.png
+++ b/zad2/wykresy/data4_m4.png
--- a/zad2/wykresy/data4_m4_h.png
+++ b/zad2/wykresy/data4_m4_h.png
--- a/zad2/wykresy/data4_m5.png
+++ b/zad2/wykresy/data4_m5.png
--- a/zad2/wykresy/data4_m5_h.png
+++ b/zad2/wykresy/data4_m5_h.png
--- a/zad2/zad2.py
+++ b/zad2/zad2.py
@@ -3,7 +3,11 @@ Komputerowa analiza danych
 Zadanie 2
 Michał Leśniak 195642
 """
 from math import sin
 from statistics import mean
 import matplotlib.pyplot as plt
 from chi2_normality import chi2normality_describe
 import numpy as np
 def var(lst):
@@ -30,41 +34,144 @@ def load_data(*args):
    return ret
-def model1(data):
+def reglin(data, name, model):
-    lst_x = [x for x, _ in data]
+    model_func, use_reglinw, func_str = model
-    lst_y = [y for _, y in data]
+    if use_reglinw:
        Y, Z, param_str = reglinw(data, model_func)
    else:
        Y, Z, param_str = reglinp(data, model_func)
-    a = cov(lst_x, lst_y)/var(lst_x)
+    err = Y-Z
-    print(f'f(X) = {a} * X')
+    lst_err = np.transpose(err)[0].tolist()
    lst_y = np.transpose(Y)[0].tolist()
    lst_z = np.transpose(Z)[0].tolist()
    mse = mean([x**2 for x in lst_err])
    md = max([abs(x) for x in lst_err])
    var_err = var(lst_err)
    var_y = var(lst_y)
    r2 = 1-(var_err/var_y)
    if len(data[0]) > 2:
        print(f'Regresja liniowa wielu zmiennych dla {name}:')
    else:
        print(f'Prosta regresja liniowa jednej zmiennej dla {name}:')
    print(func_str)
    print(param_str)
    print(f'MSE={mse}')
    print(f'maxD={md}')
    print(f'VarErr<=VarY - {var_err<=var_y}')
    print(f'r2={r2}')
    chi2normality_describe(lst_err)
    lst_z = np.transpose(Z)[0].tolist()
    if len(data[0]) == 2:  # print 2D
        lst_x, lst_y = zip(*data)
        lst_x = list(lst_x)
        lst_y = list(lst_y)
        plt.figure(1)
        ax = plt.axes()
        ax.scatter(lst_x, lst_y)
        ax.plot(lst_x, lst_z, 'r-')
        ax.set_xlabel('X')
        ax.set_ylabel('Y')
        plt.grid(True)
    elif len(data[0]) == 3:
        lst_x1, lst_x2, lst_y = zip(*data)
        lst_x1 = list(lst_x1)
        lst_x2 = list(lst_x2)
        lst_y = list(lst_y)
        plt.figure(1)
        ax = plt.axes(projection='3d')
        ax.scatter(lst_x1, lst_x2, lst_y)
        ax.scatter(lst_x1, lst_x2, lst_z, color='r')
        ax.set_xlabel('X1')
        ax.set_ylabel('X2')
        ax.set_zlabel('Y')
    else:
        raise RuntimeError
    plt.title(f'{name}\n{func_str}')
    plt.figure(2)
    plt.hist(err, 50)
    plt.xlabel('Err')
    plt.title(f'Histogram Err dla {name}\n{func_str}')
    plt.grid(True)
    plt.show()
-def model2(data):
+def reglinp(data, model_func):
-    lst_x = [x for x, _ in data]
+    lst_x, lst_y = zip(*data)
-    lst_y = [y for _, y in data]
+    lst_x = list(lst_x)
    lst_y = list(lst_y)
    return model_func(lst_x, lst_y)
 def reglinw(data, prepare_data):
    X, Y = prepare_data(data)
    XT = np.transpose(X)
    XTX = np.matmul(XT, X)
    try:
        inv_XTX = np.linalg.inv(XTX)
    except np.linang.LinAlgError:
        print("XTX is not inversible")
        raise
    A = np.matmul(np.matmul(inv_XTX, XT), Y)
    Z = np.matmul(X, A)
    params = [a[0] for a in A]
    params = params[1:] + params[:1]
    param_str = []
    for i in range(len(params)):
        param_str.append(f'{chr(ord("a")+i)} = {params[i]}')
    return Y, Z, '\n'.join(param_str)
 def model_func1(lst_x, lst_y):
    a = mean([lst_y[i]*lst_x[i] for i in range(len(lst_x))]) / \
        mean([x**2 for x in lst_x])
    Y = np.array([list((y,)) for y in lst_y])
    Z = np.array([list((a*x,))for x in lst_x])
    return Y, Z, f'a = {a}'
 def model_func2(lst_x, lst_y):
    a = cov(lst_x, lst_y)/var(lst_x)
    b = mean(lst_y) - a*mean(lst_x)
-    print(f'f(X) = {a} * X + {b}')
+    Y = np.array([list((y,)) for y in lst_y])
    Z = np.array([list((a*x+b,))for x in lst_x])
    return Y, Z, f'a = {a}\nb = {b}'
 def model_func3(data):
    return np.array([list((1.0, x**2, sin(x))) for x, _ in data]), np.array([list((y,)) for _, y in data])
 def model_func4(data):
    return np.array([list((1.0, x1, x2)) for x1, x2, _ in data]), np.array([list((y,)) for _, _, y in data])
 def model_func5(data):
    return np.array([list((1.0, x1**2, x1*x2, x2**2, x1, x2)) for x1, x2, _ in data]), np.array([list((y,)) for _, _, y in data])
 MODELS = [
    (model_func1, False, '$f(X) = aX$'),
    (model_func2, False, '$f(X) = aX + b$'),
    (model_func3, True, '$f(X) = aX^2 + bsin(X) + c$'),
    (model_func4, True, '$f(X_1, X_2) = aX_1 + bX_2 + c$'),
    (model_func5, True,
     r'$f(X_1, X_2) = a{X_1}^2 + bX_1 X_2 + c{X_2}^2 +dX_1 +eX_2 +f$')
 ]
 def main():
    data1, data2, data3, data4 = load_data(
        'data1.csv', 'data2.csv', 'data3.csv', 'data4.csv')
-    print(var([x for x, _ in data1]))
+    for i in range(3):
-    print(cov([x for x, _ in data1], [y for _, y in data1]))
+        reglin(data1, 'data1.csv', MODELS[i])
-    # print(data2)
+        reglin(data2, 'data2.csv', MODELS[i])
-    # print(data3)
+    for i in range(3, 5):
-    # print(data4)
+        reglin(data3, 'data3.csv', MODELS[i])
-    model1(data1)
+        reglin(data4, 'data4.csv', MODELS[i])
    model1(data2)
    model2(data1)
    model2(data2)
    x_mean = mean([x for x, _ in data1])
    y_mean = mean([y for _, y in data1])
    xy = sum([x*y for x, y in data1])
    x_2 = sum([x**2 for x, _ in data1])
    print(sum([2*x-2*x*y for x,y in data1])/len(data1))
    print(xy/x_2)
 if __name__ == '__main__':