Zad 2 wip
This commit is contained in:
parent
3149b56948
commit
2efbd423ad
31
zad2/README.md
Normal file
31
zad2/README.md
Normal file
@ -0,0 +1,31 @@
|
||||
# Zadanie 2: Instrukcja
|
||||
Dla podanych zestawów danych i proponowanych modeli oszacuj wartości parametrów a..f, które minimalizują średni błąd kwadratowy modelowanej funkcji w podanych punktach.
|
||||
|
||||
Dla każdego modelu przedstaw:
|
||||
- sposób przygotowania danych do zastosowania prostej regresji liniowej
|
||||
- wyznaczone wartości parametrów
|
||||
- wykres przedstawiający modelowaną funkcję na tle danych punktów
|
||||
- średni błąd kwadratowy dotyczący wartości funkcji w danych punktach
|
||||
- największą wartość odchylenia wartości funkcji od danych punktów
|
||||
- wartość współczynnika R**2
|
||||
- histogram odchyleń wartości funkcji od danych
|
||||
- (*) test hipotezy statystycznej, że błędy mają rozkład normalny (test chi-kwadrat Pearsona lub test Shapiro-Wilka)
|
||||
- komentarz na temat przydatności zastosowania rozważanego modelu
|
||||
|
||||
|
||||
|
||||
Dla zestawów danych: dane1.csv, dane2.csv (dwie kolumny: X, wartość)
|
||||
Należy rozważyć modele:
|
||||
```
|
||||
f(X) = a * X
|
||||
f(X) = a * X + b
|
||||
f(X) = a * X**2 + b * sin(X) + c
|
||||
```
|
||||
|
||||
Dla zestawów danych: dane3.csv, dane4.csv (trzy kolumny: X1, X2, wartość)
|
||||
Należy rozważyć modele:
|
||||
```
|
||||
f(X1, X2) = a * X1 + b * X2 + c
|
||||
f(X1, X2) = a * X1**2 + b * X1*X2 + c * X2**2 + d * X1 + e * X2 + f
|
||||
```
|
||||
Ostatnia modyfikacja: czwartek, 5 listopada 2020, 14:05
|
159
zad2/chi2_normality.py
Normal file
159
zad2/chi2_normality.py
Normal file
@ -0,0 +1,159 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import math
|
||||
import random
|
||||
|
||||
# for stats.chi2.cdf (cumulative chi squared distribution)
|
||||
from scipy import stats
|
||||
|
||||
|
||||
# X - list, b - begin (first index), e - end (too big index),
|
||||
# k - index of the element to set in the right place
|
||||
def quickselect(X, b, e, k):
|
||||
# end condition
|
||||
if e - b <= 1: return
|
||||
# random pivot
|
||||
r = random.randint(b, e - 1)
|
||||
X[b], X[r] = X[r], X[b]
|
||||
# partition
|
||||
p0 = b
|
||||
p1 = b + 1
|
||||
while p1 < e:
|
||||
if X[p1] < X[p0]:
|
||||
if p1 == p0 + 1:
|
||||
X[p0], X[p1] = X[p1], X[p0]
|
||||
p0 = p1
|
||||
else:
|
||||
X[p0], X[p0+1], X[p1] = X[p1], X[p0], X[p0+1]
|
||||
p0 += 1
|
||||
p1 += 1
|
||||
# recursion
|
||||
if k < p0:
|
||||
quickselect(X, b, p0, k)
|
||||
elif k > p0:
|
||||
quickselect(X, p0+1, e, k)
|
||||
|
||||
|
||||
|
||||
# quantile calculator (no libraries needed!)
|
||||
def quantile(X, q, copy=True):
|
||||
# edge cases and exceeded range of q
|
||||
if q >= 1.0:
|
||||
return max(X)
|
||||
elif q <= 0.0:
|
||||
return min(X)
|
||||
|
||||
n = len(X)
|
||||
pos = (n - 1) * q # target position
|
||||
left = int(pos) # index of the left element
|
||||
rcoeff = pos - left # coefficient for the right element
|
||||
|
||||
# optional copy
|
||||
if copy:
|
||||
X = X[:]
|
||||
|
||||
quickselect(X, 0, n, left) # quickselect!
|
||||
|
||||
# X[left] is in order now, and elements on the right are greater
|
||||
xleft = X[left] # left element
|
||||
|
||||
if rcoeff == 0.0: # the simple case
|
||||
return xleft
|
||||
else: # the general case
|
||||
xright = min(X[i] for i in range(left+1, n)) # right element
|
||||
return xleft * (1.0 - rcoeff) + xright * rcoeff
|
||||
|
||||
|
||||
# interquantile range
|
||||
def IQR(X, copy=True):
|
||||
if copy:
|
||||
X = X[:]
|
||||
q1 = quantile(X, 0.25, copy=False)
|
||||
q3 = quantile(X, 0.75, copy=False)
|
||||
return q3 - q1
|
||||
|
||||
|
||||
# cumulative distribution function of normal distribution N(mu, sigma)
|
||||
def normal_cdf(mu, sigma, x):
|
||||
return 0.5 * (1.0 + math.erf((x - mu) / (2.0**0.5 * sigma)))
|
||||
|
||||
|
||||
# Chi-squared normality test
|
||||
def chi2normality(X, var_df=1, bins=None):
|
||||
n = len(X)
|
||||
assert n >= 4
|
||||
mu = sum(X) / n # mean
|
||||
evar = sum((x - mu)**2 for x in X) / (n - var_df) # estimated variance
|
||||
sigma = evar**0.5 # the expected distribution will be N(mu, sigma)
|
||||
|
||||
minx = min(X)
|
||||
gap = max(X) - minx
|
||||
assert gap != 0.0
|
||||
if bins is None: # guess the number of bins
|
||||
# see https://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule
|
||||
h = 2.0 * IQR(X) / (n**(1.0/3.0)) # bin size by the Freedman–Diaconis rule
|
||||
k = int(math.ceil(gap / h)) # number of histogram bins
|
||||
else:
|
||||
k = bins
|
||||
k = min(4, k) # k should be at least 4
|
||||
h = gap / k # actual bin size
|
||||
|
||||
# section points between bins
|
||||
points = [minx + h * i for i in range(k + 1)]
|
||||
points[0] = -math.inf
|
||||
points[-1] = math.inf
|
||||
|
||||
# actual frequency
|
||||
freq = [0] * k
|
||||
for x in X:
|
||||
freq[min(int((x - minx) / h), k-1)] += 1
|
||||
|
||||
# cumulative distributions for the section points
|
||||
cdfs = [normal_cdf(mu, sigma, p) for p in points]
|
||||
|
||||
# expected frequencies
|
||||
expected = [(cdfs[i+1] - cdfs[i]) * n for i in range(k)]
|
||||
|
||||
# Chi-squared statistic
|
||||
chi2stat = sum((freq[i] - expected[i])**2 / expected[i] for i in range(k))
|
||||
# Chi-squared degrees of freedom
|
||||
# i.e. number of bins - 1 - number of parameters of of distribution
|
||||
# which is number of bins - 3, as normal distribution has two parameters
|
||||
chi2df = k - 3
|
||||
# p-value for the "X is sampled from a normal distribution" hypothesis
|
||||
# note: for very big chi2stat values, it is close to 0
|
||||
pvalue = 1.0 - stats.chi2.cdf(chi2stat, chi2df)
|
||||
|
||||
# significance (alpha) is a probability, that the hypothesis will be rejected
|
||||
# despite of being actually true
|
||||
|
||||
# when p-value is lower than alpha, the hypothesis is rejected
|
||||
# otherwise, the test fails to reject the hypothesis (this usually happens
|
||||
# when the hypothesis is true)
|
||||
|
||||
return pvalue
|
||||
|
||||
|
||||
# perform the test, print some messages
|
||||
def chi2normality_describe(X, alpha=0.05):
|
||||
print('Hypothesis: X is sampled from a normal distribution')
|
||||
pvalue = chi2normality(X)
|
||||
print('Significance level:', alpha)
|
||||
print('p-value: %.7f' % pvalue)
|
||||
if pvalue < alpha:
|
||||
print('Hypothesis rejected. X doesn\'t seem to be sampled from a normal distribution.')
|
||||
else:
|
||||
print('Failed to reject the hypothesis.')
|
||||
print()
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# test for some uniform distribution
|
||||
X = [random.random() * 2.0 + 3.0 for i in range(200)]
|
||||
chi2normality_describe(X)
|
||||
|
||||
# test for actual normal distribution
|
||||
X = [random.normalvariate(4.0, 5.0) for i in range(200)]
|
||||
chi2normality_describe(X)
|
||||
|
100
zad2/data1.csv
Normal file
100
zad2/data1.csv
Normal file
@ -0,0 +1,100 @@
|
||||
0.60, 0.06
|
||||
0.90, 0.21
|
||||
0.93, 0.21
|
||||
1.06, 0.24
|
||||
1.30, 0.29
|
||||
1.52, 0.27
|
||||
1.58, 0.16
|
||||
1.69, 0.34
|
||||
1.75, 0.37
|
||||
1.83, 0.35
|
||||
1.94, 0.37
|
||||
2.00, 0.36
|
||||
2.03, 0.42
|
||||
2.18, 0.46
|
||||
2.22, 0.32
|
||||
2.22, 0.45
|
||||
2.22, 0.47
|
||||
2.22, 0.46
|
||||
2.27, 0.45
|
||||
2.28, 0.41
|
||||
2.29, 0.40
|
||||
2.31, 0.40
|
||||
2.35, 0.42
|
||||
2.38, 0.46
|
||||
2.39, 0.45
|
||||
2.43, 0.51
|
||||
2.53, 0.49
|
||||
2.55, 0.52
|
||||
2.57, 0.50
|
||||
2.60, 0.55
|
||||
2.60, 0.54
|
||||
2.61, 0.46
|
||||
2.62, 0.46
|
||||
2.62, 0.49
|
||||
2.62, 0.51
|
||||
2.62, 0.54
|
||||
2.67, 0.62
|
||||
2.68, 0.51
|
||||
2.72, 0.53
|
||||
2.73, 0.46
|
||||
2.74, 0.60
|
||||
2.74, 0.48
|
||||
2.75, 0.50
|
||||
2.76, 0.59
|
||||
2.79, 0.47
|
||||
2.83, 0.59
|
||||
2.84, 0.45
|
||||
2.84, 0.51
|
||||
2.92, 0.50
|
||||
2.94, 0.54
|
||||
2.96, 0.47
|
||||
2.96, 0.59
|
||||
3.05, 0.48
|
||||
3.08, 0.50
|
||||
3.08, 0.61
|
||||
3.33, 0.61
|
||||
3.33, 0.60
|
||||
3.33, 0.67
|
||||
3.36, 0.63
|
||||
3.36, 0.62
|
||||
3.37, 0.60
|
||||
3.42, 0.57
|
||||
3.47, 0.63
|
||||
3.49, 0.48
|
||||
3.49, 0.61
|
||||
3.50, 0.63
|
||||
3.51, 0.64
|
||||
3.51, 0.63
|
||||
3.54, 0.66
|
||||
3.63, 0.71
|
||||
3.74, 0.76
|
||||
3.77, 0.69
|
||||
3.77, 0.68
|
||||
3.80, 0.74
|
||||
3.83, 0.59
|
||||
3.85, 0.70
|
||||
3.87, 0.63
|
||||
3.87, 0.70
|
||||
3.90, 0.74
|
||||
3.93, 0.65
|
||||
3.93, 0.66
|
||||
3.98, 0.68
|
||||
3.98, 0.68
|
||||
4.02, 0.67
|
||||
4.13, 0.73
|
||||
4.20, 0.77
|
||||
4.20, 0.80
|
||||
4.20, 0.70
|
||||
4.20, 0.78
|
||||
4.27, 0.67
|
||||
4.35, 0.78
|
||||
4.35, 0.68
|
||||
4.39, 0.86
|
||||
4.41, 0.73
|
||||
4.48, 0.80
|
||||
4.55, 0.77
|
||||
4.75, 0.77
|
||||
4.89, 0.78
|
||||
5.34, 0.86
|
||||
5.45, 0.89
|
|
100
zad2/data2.csv
Normal file
100
zad2/data2.csv
Normal file
@ -0,0 +1,100 @@
|
||||
1.87, 4.71
|
||||
1.95, 4.55
|
||||
2.09, 4.67
|
||||
2.20, 4.46
|
||||
2.22, 4.55
|
||||
2.42, 4.51
|
||||
2.53, 4.57
|
||||
2.56, 4.55
|
||||
2.59, 4.58
|
||||
2.59, 4.56
|
||||
2.65, 4.46
|
||||
2.80, 4.38
|
||||
2.80, 4.51
|
||||
2.83, 4.48
|
||||
2.85, 4.45
|
||||
2.87, 4.45
|
||||
2.90, 4.46
|
||||
2.90, 4.47
|
||||
2.97, 4.45
|
||||
2.97, 4.43
|
||||
2.97, 4.41
|
||||
2.98, 4.37
|
||||
3.15, 4.33
|
||||
3.18, 4.33
|
||||
3.18, 4.54
|
||||
3.23, 4.42
|
||||
3.27, 4.29
|
||||
3.32, 4.39
|
||||
3.33, 4.24
|
||||
3.35, 4.40
|
||||
3.38, 4.35
|
||||
3.40, 4.29
|
||||
3.42, 4.34
|
||||
3.45, 4.35
|
||||
3.45, 4.32
|
||||
3.50, 4.28
|
||||
3.58, 4.31
|
||||
3.69, 4.33
|
||||
3.70, 4.33
|
||||
3.72, 4.28
|
||||
3.74, 4.24
|
||||
3.75, 4.23
|
||||
3.77, 4.31
|
||||
3.85, 4.28
|
||||
3.90, 4.28
|
||||
3.90, 4.21
|
||||
3.91, 4.24
|
||||
3.92, 4.35
|
||||
3.99, 4.26
|
||||
4.04, 4.24
|
||||
4.04, 4.28
|
||||
4.05, 4.18
|
||||
4.07, 4.25
|
||||
4.10, 4.31
|
||||
4.12, 4.16
|
||||
4.16, 4.24
|
||||
4.21, 4.20
|
||||
4.23, 4.30
|
||||
4.24, 4.28
|
||||
4.24, 4.28
|
||||
4.32, 4.23
|
||||
4.35, 4.22
|
||||
4.35, 4.12
|
||||
4.36, 4.26
|
||||
4.43, 4.36
|
||||
4.45, 4.33
|
||||
4.45, 4.32
|
||||
4.47, 4.37
|
||||
4.49, 4.40
|
||||
4.57, 4.39
|
||||
4.59, 4.27
|
||||
4.60, 4.32
|
||||
4.60, 4.35
|
||||
4.61, 4.32
|
||||
4.71, 4.34
|
||||
4.75, 4.33
|
||||
4.77, 4.46
|
||||
4.78, 4.33
|
||||
4.82, 4.46
|
||||
4.90, 4.56
|
||||
4.97, 4.40
|
||||
4.99, 4.50
|
||||
5.02, 4.56
|
||||
5.05, 4.58
|
||||
5.05, 4.59
|
||||
5.06, 4.55
|
||||
5.15, 4.62
|
||||
5.20, 4.59
|
||||
5.28, 4.78
|
||||
5.28, 4.66
|
||||
5.33, 4.71
|
||||
5.38, 4.81
|
||||
5.40, 4.71
|
||||
5.54, 4.85
|
||||
5.66, 4.93
|
||||
5.69, 4.97
|
||||
5.77, 5.05
|
||||
5.84, 5.15
|
||||
5.99, 5.15
|
||||
6.21, 5.43
|
|
100
zad2/data3.csv
Normal file
100
zad2/data3.csv
Normal file
@ -0,0 +1,100 @@
|
||||
2.75, 3.79, 3.21
|
||||
2.80, 1.42, 5.13
|
||||
2.01, 3.11, 3.35
|
||||
2.02, 1.32, 3.13
|
||||
3.28, 4.61, 3.55
|
||||
2.69, 3.06, 3.72
|
||||
3.15, 1.10, 5.46
|
||||
2.72, 2.61, 4.39
|
||||
3.07, 4.20, 4.81
|
||||
2.91, 3.77, 3.46
|
||||
3.61, 3.45, 5.70
|
||||
0.91, 1.42, 1.51
|
||||
2.45, 5.00, 2.90
|
||||
3.18, 2.82, 5.27
|
||||
2.65, 2.93, 3.78
|
||||
1.51, 4.66, 0.68
|
||||
1.76, 3.16, 2.60
|
||||
1.38, 4.05, 0.82
|
||||
2.85, 2.65, 4.34
|
||||
2.52, 2.99, 3.32
|
||||
3.33, 3.79, 4.59
|
||||
2.82, 3.34, 4.41
|
||||
2.83, 2.89, 3.39
|
||||
4.71, 4.31, 8.44
|
||||
2.81, 2.42, 4.32
|
||||
2.83, 2.35, 3.89
|
||||
3.51, 1.58, 6.63
|
||||
1.87, 3.49, 1.71
|
||||
3.00, 3.24, 4.66
|
||||
2.02, 3.67, 2.50
|
||||
3.57, 4.36, 5.30
|
||||
1.59, 1.37, 3.07
|
||||
4.28, 4.27, 5.95
|
||||
4.03, 4.00, 5.54
|
||||
3.78, 2.30, 5.82
|
||||
1.97, 1.27, 3.29
|
||||
2.60, 2.16, 3.96
|
||||
3.13, 3.38, 4.87
|
||||
5.93, 3.52, 10.12
|
||||
3.20, 3.66, 3.96
|
||||
3.60, 1.95, 5.27
|
||||
2.44, 4.78, 2.88
|
||||
0.82, 1.95, 0.52
|
||||
4.58, 2.19, 7.74
|
||||
1.39, 2.26, 1.33
|
||||
3.40, 1.77, 6.96
|
||||
3.26, 3.38, 4.37
|
||||
2.93, 1.09, 5.39
|
||||
3.04, 4.57, 4.09
|
||||
2.09, 3.28, 2.51
|
||||
0.80, 3.91, -0.14
|
||||
2.18, 2.72, 3.31
|
||||
3.50, 1.13, 7.11
|
||||
2.11, 2.32, 2.50
|
||||
3.73, 2.41, 6.18
|
||||
4.91, 2.87, 8.61
|
||||
3.48, 0.20, 5.67
|
||||
3.68, 4.18, 5.29
|
||||
4.13, 4.18, 5.80
|
||||
3.12, 2.72, 5.18
|
||||
3.04, 4.76, 3.37
|
||||
1.98, 3.33, 2.49
|
||||
2.46, 3.60, 4.04
|
||||
2.47, 4.05, 2.36
|
||||
2.65, 1.80, 4.45
|
||||
2.20, 3.82, 2.99
|
||||
5.53, 3.55, 8.99
|
||||
1.35, 4.95, 0.19
|
||||
3.32, 3.79, 4.96
|
||||
2.74, 3.75, 2.60
|
||||
2.42, 1.24, 4.68
|
||||
2.15, 3.61, 2.26
|
||||
3.24, 3.58, 4.78
|
||||
3.84, 2.43, 5.78
|
||||
2.68, 3.08, 3.95
|
||||
3.66, 3.86, 5.03
|
||||
2.43, 1.20, 3.97
|
||||
1.94, 1.67, 3.33
|
||||
4.55, 2.78, 7.85
|
||||
3.34, 3.50, 5.36
|
||||
1.92, 5.04, 1.82
|
||||
2.71, 2.45, 4.32
|
||||
1.63, 4.45, -0.19
|
||||
3.80, 5.57, 5.94
|
||||
3.20, 5.54, 3.52
|
||||
3.66, 2.38, 6.20
|
||||
3.80, 4.36, 5.54
|
||||
2.56, 4.35, 2.27
|
||||
2.85, 2.99, 4.21
|
||||
3.09, 1.80, 5.12
|
||||
2.96, 3.96, 3.04
|
||||
2.23, 4.21, 2.51
|
||||
1.82, 3.40, 2.91
|
||||
2.66, 1.51, 4.79
|
||||
4.48, 3.93, 7.25
|
||||
2.15, 2.77, 2.76
|
||||
1.09, 1.58, 1.85
|
||||
3.13, 4.81, 3.73
|
||||
2.24, 1.74, 2.85
|
||||
2.58, 3.51, 3.47
|
|
100
zad2/data4.csv
Normal file
100
zad2/data4.csv
Normal file
@ -0,0 +1,100 @@
|
||||
2.17, 3.71, 1.62
|
||||
2.51, 2.96, 3.37
|
||||
2.39, 4.18, -0.20
|
||||
4.57, 3.32, 6.29
|
||||
1.90, 4.69, -0.94
|
||||
2.78, 4.06, -0.22
|
||||
2.18, 4.85, -2.46
|
||||
1.78, 4.81, -1.21
|
||||
2.60, 2.18, 5.03
|
||||
2.69, 3.14, 2.95
|
||||
2.03, 3.01, 3.54
|
||||
4.78, 2.10, 12.28
|
||||
3.07, 1.70, 6.67
|
||||
4.06, 3.68, 2.84
|
||||
1.66, 2.61, 4.53
|
||||
1.94, 2.65, 4.21
|
||||
3.85, 4.62, -2.49
|
||||
3.77, 4.78, -3.54
|
||||
0.62, 1.97, 6.96
|
||||
0.82, 1.58, 6.27
|
||||
1.65, 4.04, 1.90
|
||||
2.32, 3.50, 2.03
|
||||
1.65, 4.04, 1.66
|
||||
2.81, 2.18, 5.30
|
||||
2.66, 4.62, -2.39
|
||||
4.39, 0.99, 13.59
|
||||
2.48, 3.03, 3.21
|
||||
3.31, 3.91, 0.41
|
||||
4.19, 1.92, 9.94
|
||||
2.18, 4.25, -0.15
|
||||
3.64, 4.17, -0.40
|
||||
2.65, 1.40, 6.24
|
||||
2.22, 3.28, 2.69
|
||||
4.05, 2.90, 6.07
|
||||
1.46, 3.09, 4.18
|
||||
4.41, 2.98, 7.14
|
||||
3.47, 2.59, 5.46
|
||||
2.80, 1.36, 6.68
|
||||
2.39, 3.83, 0.95
|
||||
3.14, 1.52, 7.38
|
||||
3.23, 4.85, -3.88
|
||||
0.83, 4.86, 2.76
|
||||
2.95, 2.58, 4.59
|
||||
2.07, 3.87, 1.29
|
||||
1.88, 2.55, 4.39
|
||||
4.53, 4.29, 0.77
|
||||
2.44, 2.51, 4.35
|
||||
3.65, 4.93, -4.36
|
||||
3.41, 1.23, 8.68
|
||||
1.18, 1.48, 5.63
|
||||
3.43, 3.12, 3.63
|
||||
2.54, 2.39, 4.64
|
||||
2.70, 3.50, 1.81
|
||||
4.29, 4.11, 1.11
|
||||
1.68, 2.51, 4.62
|
||||
3.02, 2.46, 4.89
|
||||
2.20, 2.57, 4.15
|
||||
3.31, 1.67, 7.41
|
||||
1.49, 3.19, 3.98
|
||||
2.82, 1.22, 6.81
|
||||
3.00, 2.41, 5.07
|
||||
2.96, 1.61, 6.62
|
||||
1.68, 3.84, 2.25
|
||||
4.49, 1.73, 11.90
|
||||
3.68, 4.92, -4.27
|
||||
3.37, 4.90, -4.15
|
||||
3.39, 1.35, 8.35
|
||||
4.82, 4.66, -0.92
|
||||
2.74, 3.23, 2.67
|
||||
2.10, 1.82, 5.19
|
||||
1.77, 1.04, 5.33
|
||||
2.41, 2.99, 3.25
|
||||
2.08, 4.33, -0.38
|
||||
0.74, 3.81, 5.32
|
||||
1.61, 1.58, 5.32
|
||||
2.69, 4.96, -4.06
|
||||
2.20, 3.24, 2.84
|
||||
2.83, 1.49, 6.54
|
||||
1.64, 4.31, 0.96
|
||||
3.51, 4.05, 0.05
|
||||
2.84, 1.27, 6.77
|
||||
4.02, 3.44, 3.99
|
||||
3.25, 4.25, -1.11
|
||||
3.72, 2.60, 6.10
|
||||
3.85, 3.95, 1.00
|
||||
4.63, 3.47, 5.56
|
||||
2.30, 4.43, -1.04
|
||||
1.44, 3.37, 3.76
|
||||
3.16, 2.71, 4.62
|
||||
1.85, 4.90, -1.60
|
||||
4.37, 3.26, 5.59
|
||||
3.55, 2.81, 5.05
|
||||
3.28, 1.32, 7.88
|
||||
3.93, 2.02, 8.84
|
||||
3.21, 2.14, 6.19
|
||||
2.67, 1.44, 6.23
|
||||
1.68, 1.16, 5.32
|
||||
3.36, 3.68, 1.56
|
||||
2.31, 1.71, 5.40
|
||||
3.87, 1.19, 10.51
|
|
BIN
zad2/regresja-liniowa (1).pdf
(Stored with Git LFS)
Normal file
BIN
zad2/regresja-liniowa (1).pdf
(Stored with Git LFS)
Normal file
Binary file not shown.
71
zad2/zad2.py
Normal file
71
zad2/zad2.py
Normal file
@ -0,0 +1,71 @@
|
||||
"""
|
||||
Komputerowa analiza danych
|
||||
Zadanie 2
|
||||
Michał Leśniak 195642
|
||||
"""
|
||||
from statistics import mean
|
||||
|
||||
|
||||
def var(lst):
|
||||
x_mean = mean(lst)
|
||||
return sum((x-x_mean)**2 for x in lst)/len(lst)
|
||||
|
||||
|
||||
def cov(lst_x, lst_y):
|
||||
assert len(lst_x) == len(lst_y)
|
||||
x_mean = mean(lst_x)
|
||||
y_mean = mean(lst_y)
|
||||
return sum((lst_x[i]-x_mean)*(lst_y[i]-y_mean) for i in range(len(lst_x)))/len(lst_x)
|
||||
|
||||
|
||||
def load_data(*args):
|
||||
ret = ()
|
||||
for arg in args:
|
||||
with open(arg, 'r') as f:
|
||||
lines = f.read().splitlines()
|
||||
lst = []
|
||||
for line in lines:
|
||||
lst.append(tuple([float(x.strip()) for x in line.split(',')]))
|
||||
ret += lst,
|
||||
return ret
|
||||
|
||||
|
||||
def model1(data):
|
||||
lst_x = [x for x, _ in data]
|
||||
lst_y = [y for _, y in data]
|
||||
|
||||
a = cov(lst_x, lst_y)/var(lst_x)
|
||||
print(f'f(X) = {a} * X')
|
||||
|
||||
|
||||
def model2(data):
|
||||
lst_x = [x for x, _ in data]
|
||||
lst_y = [y for _, y in data]
|
||||
|
||||
a = cov(lst_x, lst_y)/var(lst_x)
|
||||
b = mean(lst_y) - a*mean(lst_x)
|
||||
print(f'f(X) = {a} * X + {b}')
|
||||
|
||||
|
||||
def main():
|
||||
data1, data2, data3, data4 = load_data(
|
||||
'data1.csv', 'data2.csv', 'data3.csv', 'data4.csv')
|
||||
print(var([x for x, _ in data1]))
|
||||
print(cov([x for x, _ in data1], [y for _, y in data1]))
|
||||
# print(data2)
|
||||
# print(data3)
|
||||
# print(data4)
|
||||
model1(data1)
|
||||
model1(data2)
|
||||
model2(data1)
|
||||
model2(data2)
|
||||
x_mean = mean([x for x, _ in data1])
|
||||
y_mean = mean([y for _, y in data1])
|
||||
xy = sum([x*y for x, y in data1])
|
||||
x_2 = sum([x**2 for x, _ in data1])
|
||||
print(sum([2*x-2*x*y for x,y in data1])/len(data1))
|
||||
print(xy/x_2)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in New Issue
Block a user