Zad 2 wip

This commit is contained in:
Michał Leśniak 2021-12-05 19:59:44 +01:00
parent 3149b56948
commit 2efbd423ad
8 changed files with 664 additions and 0 deletions

31
zad2/README.md Normal file
View File

@ -0,0 +1,31 @@
# Zadanie 2: Instrukcja
Dla podanych zestawów danych i proponowanych modeli oszacuj wartości parametrów a..f, które minimalizują średni błąd kwadratowy modelowanej funkcji w podanych punktach.
Dla każdego modelu przedstaw:
- sposób przygotowania danych do zastosowania prostej regresji liniowej
- wyznaczone wartości parametrów
- wykres przedstawiający modelowaną funkcję na tle danych punktów
- średni błąd kwadratowy dotyczący wartości funkcji w danych punktach
- największą wartość odchylenia wartości funkcji od danych punktów
- wartość współczynnika R**2
- histogram odchyleń wartości funkcji od danych
- (*) test hipotezy statystycznej, że błędy mają rozkład normalny (test chi-kwadrat Pearsona lub test Shapiro-Wilka)
- komentarz na temat przydatności zastosowania rozważanego modelu
Dla zestawów danych: dane1.csv, dane2.csv (dwie kolumny: X, wartość)
Należy rozważyć modele:
```
f(X) = a * X
f(X) = a * X + b
f(X) = a * X**2 + b * sin(X) + c
```
Dla zestawów danych: dane3.csv, dane4.csv (trzy kolumny: X1, X2, wartość)
Należy rozważyć modele:
```
f(X1, X2) = a * X1 + b * X2 + c
f(X1, X2) = a * X1**2 + b * X1*X2 + c * X2**2 + d * X1 + e * X2 + f
```
Ostatnia modyfikacja: czwartek, 5 listopada 2020, 14:05

159
zad2/chi2_normality.py Normal file
View File

@ -0,0 +1,159 @@
#!/usr/bin/env python3
import math
import random
# for stats.chi2.cdf (cumulative chi squared distribution)
from scipy import stats
# X - list, b - begin (first index), e - end (too big index),
# k - index of the element to set in the right place
def quickselect(X, b, e, k):
# end condition
if e - b <= 1: return
# random pivot
r = random.randint(b, e - 1)
X[b], X[r] = X[r], X[b]
# partition
p0 = b
p1 = b + 1
while p1 < e:
if X[p1] < X[p0]:
if p1 == p0 + 1:
X[p0], X[p1] = X[p1], X[p0]
p0 = p1
else:
X[p0], X[p0+1], X[p1] = X[p1], X[p0], X[p0+1]
p0 += 1
p1 += 1
# recursion
if k < p0:
quickselect(X, b, p0, k)
elif k > p0:
quickselect(X, p0+1, e, k)
# quantile calculator (no libraries needed!)
def quantile(X, q, copy=True):
# edge cases and exceeded range of q
if q >= 1.0:
return max(X)
elif q <= 0.0:
return min(X)
n = len(X)
pos = (n - 1) * q # target position
left = int(pos) # index of the left element
rcoeff = pos - left # coefficient for the right element
# optional copy
if copy:
X = X[:]
quickselect(X, 0, n, left) # quickselect!
# X[left] is in order now, and elements on the right are greater
xleft = X[left] # left element
if rcoeff == 0.0: # the simple case
return xleft
else: # the general case
xright = min(X[i] for i in range(left+1, n)) # right element
return xleft * (1.0 - rcoeff) + xright * rcoeff
# interquantile range
def IQR(X, copy=True):
if copy:
X = X[:]
q1 = quantile(X, 0.25, copy=False)
q3 = quantile(X, 0.75, copy=False)
return q3 - q1
# cumulative distribution function of normal distribution N(mu, sigma)
def normal_cdf(mu, sigma, x):
return 0.5 * (1.0 + math.erf((x - mu) / (2.0**0.5 * sigma)))
# Chi-squared normality test
def chi2normality(X, var_df=1, bins=None):
n = len(X)
assert n >= 4
mu = sum(X) / n # mean
evar = sum((x - mu)**2 for x in X) / (n - var_df) # estimated variance
sigma = evar**0.5 # the expected distribution will be N(mu, sigma)
minx = min(X)
gap = max(X) - minx
assert gap != 0.0
if bins is None: # guess the number of bins
# see https://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule
h = 2.0 * IQR(X) / (n**(1.0/3.0)) # bin size by the FreedmanDiaconis rule
k = int(math.ceil(gap / h)) # number of histogram bins
else:
k = bins
k = min(4, k) # k should be at least 4
h = gap / k # actual bin size
# section points between bins
points = [minx + h * i for i in range(k + 1)]
points[0] = -math.inf
points[-1] = math.inf
# actual frequency
freq = [0] * k
for x in X:
freq[min(int((x - minx) / h), k-1)] += 1
# cumulative distributions for the section points
cdfs = [normal_cdf(mu, sigma, p) for p in points]
# expected frequencies
expected = [(cdfs[i+1] - cdfs[i]) * n for i in range(k)]
# Chi-squared statistic
chi2stat = sum((freq[i] - expected[i])**2 / expected[i] for i in range(k))
# Chi-squared degrees of freedom
# i.e. number of bins - 1 - number of parameters of of distribution
# which is number of bins - 3, as normal distribution has two parameters
chi2df = k - 3
# p-value for the "X is sampled from a normal distribution" hypothesis
# note: for very big chi2stat values, it is close to 0
pvalue = 1.0 - stats.chi2.cdf(chi2stat, chi2df)
# significance (alpha) is a probability, that the hypothesis will be rejected
# despite of being actually true
# when p-value is lower than alpha, the hypothesis is rejected
# otherwise, the test fails to reject the hypothesis (this usually happens
# when the hypothesis is true)
return pvalue
# perform the test, print some messages
def chi2normality_describe(X, alpha=0.05):
print('Hypothesis: X is sampled from a normal distribution')
pvalue = chi2normality(X)
print('Significance level:', alpha)
print('p-value: %.7f' % pvalue)
if pvalue < alpha:
print('Hypothesis rejected. X doesn\'t seem to be sampled from a normal distribution.')
else:
print('Failed to reject the hypothesis.')
print()
if __name__ == '__main__':
# test for some uniform distribution
X = [random.random() * 2.0 + 3.0 for i in range(200)]
chi2normality_describe(X)
# test for actual normal distribution
X = [random.normalvariate(4.0, 5.0) for i in range(200)]
chi2normality_describe(X)

100
zad2/data1.csv Normal file
View File

@ -0,0 +1,100 @@
0.60, 0.06
0.90, 0.21
0.93, 0.21
1.06, 0.24
1.30, 0.29
1.52, 0.27
1.58, 0.16
1.69, 0.34
1.75, 0.37
1.83, 0.35
1.94, 0.37
2.00, 0.36
2.03, 0.42
2.18, 0.46
2.22, 0.32
2.22, 0.45
2.22, 0.47
2.22, 0.46
2.27, 0.45
2.28, 0.41
2.29, 0.40
2.31, 0.40
2.35, 0.42
2.38, 0.46
2.39, 0.45
2.43, 0.51
2.53, 0.49
2.55, 0.52
2.57, 0.50
2.60, 0.55
2.60, 0.54
2.61, 0.46
2.62, 0.46
2.62, 0.49
2.62, 0.51
2.62, 0.54
2.67, 0.62
2.68, 0.51
2.72, 0.53
2.73, 0.46
2.74, 0.60
2.74, 0.48
2.75, 0.50
2.76, 0.59
2.79, 0.47
2.83, 0.59
2.84, 0.45
2.84, 0.51
2.92, 0.50
2.94, 0.54
2.96, 0.47
2.96, 0.59
3.05, 0.48
3.08, 0.50
3.08, 0.61
3.33, 0.61
3.33, 0.60
3.33, 0.67
3.36, 0.63
3.36, 0.62
3.37, 0.60
3.42, 0.57
3.47, 0.63
3.49, 0.48
3.49, 0.61
3.50, 0.63
3.51, 0.64
3.51, 0.63
3.54, 0.66
3.63, 0.71
3.74, 0.76
3.77, 0.69
3.77, 0.68
3.80, 0.74
3.83, 0.59
3.85, 0.70
3.87, 0.63
3.87, 0.70
3.90, 0.74
3.93, 0.65
3.93, 0.66
3.98, 0.68
3.98, 0.68
4.02, 0.67
4.13, 0.73
4.20, 0.77
4.20, 0.80
4.20, 0.70
4.20, 0.78
4.27, 0.67
4.35, 0.78
4.35, 0.68
4.39, 0.86
4.41, 0.73
4.48, 0.80
4.55, 0.77
4.75, 0.77
4.89, 0.78
5.34, 0.86
5.45, 0.89
1 0.60 0.06
2 0.90 0.21
3 0.93 0.21
4 1.06 0.24
5 1.30 0.29
6 1.52 0.27
7 1.58 0.16
8 1.69 0.34
9 1.75 0.37
10 1.83 0.35
11 1.94 0.37
12 2.00 0.36
13 2.03 0.42
14 2.18 0.46
15 2.22 0.32
16 2.22 0.45
17 2.22 0.47
18 2.22 0.46
19 2.27 0.45
20 2.28 0.41
21 2.29 0.40
22 2.31 0.40
23 2.35 0.42
24 2.38 0.46
25 2.39 0.45
26 2.43 0.51
27 2.53 0.49
28 2.55 0.52
29 2.57 0.50
30 2.60 0.55
31 2.60 0.54
32 2.61 0.46
33 2.62 0.46
34 2.62 0.49
35 2.62 0.51
36 2.62 0.54
37 2.67 0.62
38 2.68 0.51
39 2.72 0.53
40 2.73 0.46
41 2.74 0.60
42 2.74 0.48
43 2.75 0.50
44 2.76 0.59
45 2.79 0.47
46 2.83 0.59
47 2.84 0.45
48 2.84 0.51
49 2.92 0.50
50 2.94 0.54
51 2.96 0.47
52 2.96 0.59
53 3.05 0.48
54 3.08 0.50
55 3.08 0.61
56 3.33 0.61
57 3.33 0.60
58 3.33 0.67
59 3.36 0.63
60 3.36 0.62
61 3.37 0.60
62 3.42 0.57
63 3.47 0.63
64 3.49 0.48
65 3.49 0.61
66 3.50 0.63
67 3.51 0.64
68 3.51 0.63
69 3.54 0.66
70 3.63 0.71
71 3.74 0.76
72 3.77 0.69
73 3.77 0.68
74 3.80 0.74
75 3.83 0.59
76 3.85 0.70
77 3.87 0.63
78 3.87 0.70
79 3.90 0.74
80 3.93 0.65
81 3.93 0.66
82 3.98 0.68
83 3.98 0.68
84 4.02 0.67
85 4.13 0.73
86 4.20 0.77
87 4.20 0.80
88 4.20 0.70
89 4.20 0.78
90 4.27 0.67
91 4.35 0.78
92 4.35 0.68
93 4.39 0.86
94 4.41 0.73
95 4.48 0.80
96 4.55 0.77
97 4.75 0.77
98 4.89 0.78
99 5.34 0.86
100 5.45 0.89

100
zad2/data2.csv Normal file
View File

@ -0,0 +1,100 @@
1.87, 4.71
1.95, 4.55
2.09, 4.67
2.20, 4.46
2.22, 4.55
2.42, 4.51
2.53, 4.57
2.56, 4.55
2.59, 4.58
2.59, 4.56
2.65, 4.46
2.80, 4.38
2.80, 4.51
2.83, 4.48
2.85, 4.45
2.87, 4.45
2.90, 4.46
2.90, 4.47
2.97, 4.45
2.97, 4.43
2.97, 4.41
2.98, 4.37
3.15, 4.33
3.18, 4.33
3.18, 4.54
3.23, 4.42
3.27, 4.29
3.32, 4.39
3.33, 4.24
3.35, 4.40
3.38, 4.35
3.40, 4.29
3.42, 4.34
3.45, 4.35
3.45, 4.32
3.50, 4.28
3.58, 4.31
3.69, 4.33
3.70, 4.33
3.72, 4.28
3.74, 4.24
3.75, 4.23
3.77, 4.31
3.85, 4.28
3.90, 4.28
3.90, 4.21
3.91, 4.24
3.92, 4.35
3.99, 4.26
4.04, 4.24
4.04, 4.28
4.05, 4.18
4.07, 4.25
4.10, 4.31
4.12, 4.16
4.16, 4.24
4.21, 4.20
4.23, 4.30
4.24, 4.28
4.24, 4.28
4.32, 4.23
4.35, 4.22
4.35, 4.12
4.36, 4.26
4.43, 4.36
4.45, 4.33
4.45, 4.32
4.47, 4.37
4.49, 4.40
4.57, 4.39
4.59, 4.27
4.60, 4.32
4.60, 4.35
4.61, 4.32
4.71, 4.34
4.75, 4.33
4.77, 4.46
4.78, 4.33
4.82, 4.46
4.90, 4.56
4.97, 4.40
4.99, 4.50
5.02, 4.56
5.05, 4.58
5.05, 4.59
5.06, 4.55
5.15, 4.62
5.20, 4.59
5.28, 4.78
5.28, 4.66
5.33, 4.71
5.38, 4.81
5.40, 4.71
5.54, 4.85
5.66, 4.93
5.69, 4.97
5.77, 5.05
5.84, 5.15
5.99, 5.15
6.21, 5.43
1 1.87 4.71
2 1.95 4.55
3 2.09 4.67
4 2.20 4.46
5 2.22 4.55
6 2.42 4.51
7 2.53 4.57
8 2.56 4.55
9 2.59 4.58
10 2.59 4.56
11 2.65 4.46
12 2.80 4.38
13 2.80 4.51
14 2.83 4.48
15 2.85 4.45
16 2.87 4.45
17 2.90 4.46
18 2.90 4.47
19 2.97 4.45
20 2.97 4.43
21 2.97 4.41
22 2.98 4.37
23 3.15 4.33
24 3.18 4.33
25 3.18 4.54
26 3.23 4.42
27 3.27 4.29
28 3.32 4.39
29 3.33 4.24
30 3.35 4.40
31 3.38 4.35
32 3.40 4.29
33 3.42 4.34
34 3.45 4.35
35 3.45 4.32
36 3.50 4.28
37 3.58 4.31
38 3.69 4.33
39 3.70 4.33
40 3.72 4.28
41 3.74 4.24
42 3.75 4.23
43 3.77 4.31
44 3.85 4.28
45 3.90 4.28
46 3.90 4.21
47 3.91 4.24
48 3.92 4.35
49 3.99 4.26
50 4.04 4.24
51 4.04 4.28
52 4.05 4.18
53 4.07 4.25
54 4.10 4.31
55 4.12 4.16
56 4.16 4.24
57 4.21 4.20
58 4.23 4.30
59 4.24 4.28
60 4.24 4.28
61 4.32 4.23
62 4.35 4.22
63 4.35 4.12
64 4.36 4.26
65 4.43 4.36
66 4.45 4.33
67 4.45 4.32
68 4.47 4.37
69 4.49 4.40
70 4.57 4.39
71 4.59 4.27
72 4.60 4.32
73 4.60 4.35
74 4.61 4.32
75 4.71 4.34
76 4.75 4.33
77 4.77 4.46
78 4.78 4.33
79 4.82 4.46
80 4.90 4.56
81 4.97 4.40
82 4.99 4.50
83 5.02 4.56
84 5.05 4.58
85 5.05 4.59
86 5.06 4.55
87 5.15 4.62
88 5.20 4.59
89 5.28 4.78
90 5.28 4.66
91 5.33 4.71
92 5.38 4.81
93 5.40 4.71
94 5.54 4.85
95 5.66 4.93
96 5.69 4.97
97 5.77 5.05
98 5.84 5.15
99 5.99 5.15
100 6.21 5.43

100
zad2/data3.csv Normal file
View File

@ -0,0 +1,100 @@
2.75, 3.79, 3.21
2.80, 1.42, 5.13
2.01, 3.11, 3.35
2.02, 1.32, 3.13
3.28, 4.61, 3.55
2.69, 3.06, 3.72
3.15, 1.10, 5.46
2.72, 2.61, 4.39
3.07, 4.20, 4.81
2.91, 3.77, 3.46
3.61, 3.45, 5.70
0.91, 1.42, 1.51
2.45, 5.00, 2.90
3.18, 2.82, 5.27
2.65, 2.93, 3.78
1.51, 4.66, 0.68
1.76, 3.16, 2.60
1.38, 4.05, 0.82
2.85, 2.65, 4.34
2.52, 2.99, 3.32
3.33, 3.79, 4.59
2.82, 3.34, 4.41
2.83, 2.89, 3.39
4.71, 4.31, 8.44
2.81, 2.42, 4.32
2.83, 2.35, 3.89
3.51, 1.58, 6.63
1.87, 3.49, 1.71
3.00, 3.24, 4.66
2.02, 3.67, 2.50
3.57, 4.36, 5.30
1.59, 1.37, 3.07
4.28, 4.27, 5.95
4.03, 4.00, 5.54
3.78, 2.30, 5.82
1.97, 1.27, 3.29
2.60, 2.16, 3.96
3.13, 3.38, 4.87
5.93, 3.52, 10.12
3.20, 3.66, 3.96
3.60, 1.95, 5.27
2.44, 4.78, 2.88
0.82, 1.95, 0.52
4.58, 2.19, 7.74
1.39, 2.26, 1.33
3.40, 1.77, 6.96
3.26, 3.38, 4.37
2.93, 1.09, 5.39
3.04, 4.57, 4.09
2.09, 3.28, 2.51
0.80, 3.91, -0.14
2.18, 2.72, 3.31
3.50, 1.13, 7.11
2.11, 2.32, 2.50
3.73, 2.41, 6.18
4.91, 2.87, 8.61
3.48, 0.20, 5.67
3.68, 4.18, 5.29
4.13, 4.18, 5.80
3.12, 2.72, 5.18
3.04, 4.76, 3.37
1.98, 3.33, 2.49
2.46, 3.60, 4.04
2.47, 4.05, 2.36
2.65, 1.80, 4.45
2.20, 3.82, 2.99
5.53, 3.55, 8.99
1.35, 4.95, 0.19
3.32, 3.79, 4.96
2.74, 3.75, 2.60
2.42, 1.24, 4.68
2.15, 3.61, 2.26
3.24, 3.58, 4.78
3.84, 2.43, 5.78
2.68, 3.08, 3.95
3.66, 3.86, 5.03
2.43, 1.20, 3.97
1.94, 1.67, 3.33
4.55, 2.78, 7.85
3.34, 3.50, 5.36
1.92, 5.04, 1.82
2.71, 2.45, 4.32
1.63, 4.45, -0.19
3.80, 5.57, 5.94
3.20, 5.54, 3.52
3.66, 2.38, 6.20
3.80, 4.36, 5.54
2.56, 4.35, 2.27
2.85, 2.99, 4.21
3.09, 1.80, 5.12
2.96, 3.96, 3.04
2.23, 4.21, 2.51
1.82, 3.40, 2.91
2.66, 1.51, 4.79
4.48, 3.93, 7.25
2.15, 2.77, 2.76
1.09, 1.58, 1.85
3.13, 4.81, 3.73
2.24, 1.74, 2.85
2.58, 3.51, 3.47
1 2.75 3.79 3.21
2 2.80 1.42 5.13
3 2.01 3.11 3.35
4 2.02 1.32 3.13
5 3.28 4.61 3.55
6 2.69 3.06 3.72
7 3.15 1.10 5.46
8 2.72 2.61 4.39
9 3.07 4.20 4.81
10 2.91 3.77 3.46
11 3.61 3.45 5.70
12 0.91 1.42 1.51
13 2.45 5.00 2.90
14 3.18 2.82 5.27
15 2.65 2.93 3.78
16 1.51 4.66 0.68
17 1.76 3.16 2.60
18 1.38 4.05 0.82
19 2.85 2.65 4.34
20 2.52 2.99 3.32
21 3.33 3.79 4.59
22 2.82 3.34 4.41
23 2.83 2.89 3.39
24 4.71 4.31 8.44
25 2.81 2.42 4.32
26 2.83 2.35 3.89
27 3.51 1.58 6.63
28 1.87 3.49 1.71
29 3.00 3.24 4.66
30 2.02 3.67 2.50
31 3.57 4.36 5.30
32 1.59 1.37 3.07
33 4.28 4.27 5.95
34 4.03 4.00 5.54
35 3.78 2.30 5.82
36 1.97 1.27 3.29
37 2.60 2.16 3.96
38 3.13 3.38 4.87
39 5.93 3.52 10.12
40 3.20 3.66 3.96
41 3.60 1.95 5.27
42 2.44 4.78 2.88
43 0.82 1.95 0.52
44 4.58 2.19 7.74
45 1.39 2.26 1.33
46 3.40 1.77 6.96
47 3.26 3.38 4.37
48 2.93 1.09 5.39
49 3.04 4.57 4.09
50 2.09 3.28 2.51
51 0.80 3.91 -0.14
52 2.18 2.72 3.31
53 3.50 1.13 7.11
54 2.11 2.32 2.50
55 3.73 2.41 6.18
56 4.91 2.87 8.61
57 3.48 0.20 5.67
58 3.68 4.18 5.29
59 4.13 4.18 5.80
60 3.12 2.72 5.18
61 3.04 4.76 3.37
62 1.98 3.33 2.49
63 2.46 3.60 4.04
64 2.47 4.05 2.36
65 2.65 1.80 4.45
66 2.20 3.82 2.99
67 5.53 3.55 8.99
68 1.35 4.95 0.19
69 3.32 3.79 4.96
70 2.74 3.75 2.60
71 2.42 1.24 4.68
72 2.15 3.61 2.26
73 3.24 3.58 4.78
74 3.84 2.43 5.78
75 2.68 3.08 3.95
76 3.66 3.86 5.03
77 2.43 1.20 3.97
78 1.94 1.67 3.33
79 4.55 2.78 7.85
80 3.34 3.50 5.36
81 1.92 5.04 1.82
82 2.71 2.45 4.32
83 1.63 4.45 -0.19
84 3.80 5.57 5.94
85 3.20 5.54 3.52
86 3.66 2.38 6.20
87 3.80 4.36 5.54
88 2.56 4.35 2.27
89 2.85 2.99 4.21
90 3.09 1.80 5.12
91 2.96 3.96 3.04
92 2.23 4.21 2.51
93 1.82 3.40 2.91
94 2.66 1.51 4.79
95 4.48 3.93 7.25
96 2.15 2.77 2.76
97 1.09 1.58 1.85
98 3.13 4.81 3.73
99 2.24 1.74 2.85
100 2.58 3.51 3.47

100
zad2/data4.csv Normal file
View File

@ -0,0 +1,100 @@
2.17, 3.71, 1.62
2.51, 2.96, 3.37
2.39, 4.18, -0.20
4.57, 3.32, 6.29
1.90, 4.69, -0.94
2.78, 4.06, -0.22
2.18, 4.85, -2.46
1.78, 4.81, -1.21
2.60, 2.18, 5.03
2.69, 3.14, 2.95
2.03, 3.01, 3.54
4.78, 2.10, 12.28
3.07, 1.70, 6.67
4.06, 3.68, 2.84
1.66, 2.61, 4.53
1.94, 2.65, 4.21
3.85, 4.62, -2.49
3.77, 4.78, -3.54
0.62, 1.97, 6.96
0.82, 1.58, 6.27
1.65, 4.04, 1.90
2.32, 3.50, 2.03
1.65, 4.04, 1.66
2.81, 2.18, 5.30
2.66, 4.62, -2.39
4.39, 0.99, 13.59
2.48, 3.03, 3.21
3.31, 3.91, 0.41
4.19, 1.92, 9.94
2.18, 4.25, -0.15
3.64, 4.17, -0.40
2.65, 1.40, 6.24
2.22, 3.28, 2.69
4.05, 2.90, 6.07
1.46, 3.09, 4.18
4.41, 2.98, 7.14
3.47, 2.59, 5.46
2.80, 1.36, 6.68
2.39, 3.83, 0.95
3.14, 1.52, 7.38
3.23, 4.85, -3.88
0.83, 4.86, 2.76
2.95, 2.58, 4.59
2.07, 3.87, 1.29
1.88, 2.55, 4.39
4.53, 4.29, 0.77
2.44, 2.51, 4.35
3.65, 4.93, -4.36
3.41, 1.23, 8.68
1.18, 1.48, 5.63
3.43, 3.12, 3.63
2.54, 2.39, 4.64
2.70, 3.50, 1.81
4.29, 4.11, 1.11
1.68, 2.51, 4.62
3.02, 2.46, 4.89
2.20, 2.57, 4.15
3.31, 1.67, 7.41
1.49, 3.19, 3.98
2.82, 1.22, 6.81
3.00, 2.41, 5.07
2.96, 1.61, 6.62
1.68, 3.84, 2.25
4.49, 1.73, 11.90
3.68, 4.92, -4.27
3.37, 4.90, -4.15
3.39, 1.35, 8.35
4.82, 4.66, -0.92
2.74, 3.23, 2.67
2.10, 1.82, 5.19
1.77, 1.04, 5.33
2.41, 2.99, 3.25
2.08, 4.33, -0.38
0.74, 3.81, 5.32
1.61, 1.58, 5.32
2.69, 4.96, -4.06
2.20, 3.24, 2.84
2.83, 1.49, 6.54
1.64, 4.31, 0.96
3.51, 4.05, 0.05
2.84, 1.27, 6.77
4.02, 3.44, 3.99
3.25, 4.25, -1.11
3.72, 2.60, 6.10
3.85, 3.95, 1.00
4.63, 3.47, 5.56
2.30, 4.43, -1.04
1.44, 3.37, 3.76
3.16, 2.71, 4.62
1.85, 4.90, -1.60
4.37, 3.26, 5.59
3.55, 2.81, 5.05
3.28, 1.32, 7.88
3.93, 2.02, 8.84
3.21, 2.14, 6.19
2.67, 1.44, 6.23
1.68, 1.16, 5.32
3.36, 3.68, 1.56
2.31, 1.71, 5.40
3.87, 1.19, 10.51
1 2.17 3.71 1.62
2 2.51 2.96 3.37
3 2.39 4.18 -0.20
4 4.57 3.32 6.29
5 1.90 4.69 -0.94
6 2.78 4.06 -0.22
7 2.18 4.85 -2.46
8 1.78 4.81 -1.21
9 2.60 2.18 5.03
10 2.69 3.14 2.95
11 2.03 3.01 3.54
12 4.78 2.10 12.28
13 3.07 1.70 6.67
14 4.06 3.68 2.84
15 1.66 2.61 4.53
16 1.94 2.65 4.21
17 3.85 4.62 -2.49
18 3.77 4.78 -3.54
19 0.62 1.97 6.96
20 0.82 1.58 6.27
21 1.65 4.04 1.90
22 2.32 3.50 2.03
23 1.65 4.04 1.66
24 2.81 2.18 5.30
25 2.66 4.62 -2.39
26 4.39 0.99 13.59
27 2.48 3.03 3.21
28 3.31 3.91 0.41
29 4.19 1.92 9.94
30 2.18 4.25 -0.15
31 3.64 4.17 -0.40
32 2.65 1.40 6.24
33 2.22 3.28 2.69
34 4.05 2.90 6.07
35 1.46 3.09 4.18
36 4.41 2.98 7.14
37 3.47 2.59 5.46
38 2.80 1.36 6.68
39 2.39 3.83 0.95
40 3.14 1.52 7.38
41 3.23 4.85 -3.88
42 0.83 4.86 2.76
43 2.95 2.58 4.59
44 2.07 3.87 1.29
45 1.88 2.55 4.39
46 4.53 4.29 0.77
47 2.44 2.51 4.35
48 3.65 4.93 -4.36
49 3.41 1.23 8.68
50 1.18 1.48 5.63
51 3.43 3.12 3.63
52 2.54 2.39 4.64
53 2.70 3.50 1.81
54 4.29 4.11 1.11
55 1.68 2.51 4.62
56 3.02 2.46 4.89
57 2.20 2.57 4.15
58 3.31 1.67 7.41
59 1.49 3.19 3.98
60 2.82 1.22 6.81
61 3.00 2.41 5.07
62 2.96 1.61 6.62
63 1.68 3.84 2.25
64 4.49 1.73 11.90
65 3.68 4.92 -4.27
66 3.37 4.90 -4.15
67 3.39 1.35 8.35
68 4.82 4.66 -0.92
69 2.74 3.23 2.67
70 2.10 1.82 5.19
71 1.77 1.04 5.33
72 2.41 2.99 3.25
73 2.08 4.33 -0.38
74 0.74 3.81 5.32
75 1.61 1.58 5.32
76 2.69 4.96 -4.06
77 2.20 3.24 2.84
78 2.83 1.49 6.54
79 1.64 4.31 0.96
80 3.51 4.05 0.05
81 2.84 1.27 6.77
82 4.02 3.44 3.99
83 3.25 4.25 -1.11
84 3.72 2.60 6.10
85 3.85 3.95 1.00
86 4.63 3.47 5.56
87 2.30 4.43 -1.04
88 1.44 3.37 3.76
89 3.16 2.71 4.62
90 1.85 4.90 -1.60
91 4.37 3.26 5.59
92 3.55 2.81 5.05
93 3.28 1.32 7.88
94 3.93 2.02 8.84
95 3.21 2.14 6.19
96 2.67 1.44 6.23
97 1.68 1.16 5.32
98 3.36 3.68 1.56
99 2.31 1.71 5.40
100 3.87 1.19 10.51

BIN
zad2/regresja-liniowa (1).pdf (Stored with Git LFS) Normal file

Binary file not shown.

71
zad2/zad2.py Normal file
View File

@ -0,0 +1,71 @@
"""
Komputerowa analiza danych
Zadanie 2
Michał Leśniak 195642
"""
from statistics import mean
def var(lst):
x_mean = mean(lst)
return sum((x-x_mean)**2 for x in lst)/len(lst)
def cov(lst_x, lst_y):
assert len(lst_x) == len(lst_y)
x_mean = mean(lst_x)
y_mean = mean(lst_y)
return sum((lst_x[i]-x_mean)*(lst_y[i]-y_mean) for i in range(len(lst_x)))/len(lst_x)
def load_data(*args):
ret = ()
for arg in args:
with open(arg, 'r') as f:
lines = f.read().splitlines()
lst = []
for line in lines:
lst.append(tuple([float(x.strip()) for x in line.split(',')]))
ret += lst,
return ret
def model1(data):
lst_x = [x for x, _ in data]
lst_y = [y for _, y in data]
a = cov(lst_x, lst_y)/var(lst_x)
print(f'f(X) = {a} * X')
def model2(data):
lst_x = [x for x, _ in data]
lst_y = [y for _, y in data]
a = cov(lst_x, lst_y)/var(lst_x)
b = mean(lst_y) - a*mean(lst_x)
print(f'f(X) = {a} * X + {b}')
def main():
data1, data2, data3, data4 = load_data(
'data1.csv', 'data2.csv', 'data3.csv', 'data4.csv')
print(var([x for x, _ in data1]))
print(cov([x for x, _ in data1], [y for _, y in data1]))
# print(data2)
# print(data3)
# print(data4)
model1(data1)
model1(data2)
model2(data1)
model2(data2)
x_mean = mean([x for x, _ in data1])
y_mean = mean([y for _, y in data1])
xy = sum([x*y for x, y in data1])
x_2 = sum([x**2 for x, _ in data1])
print(sum([2*x-2*x*y for x,y in data1])/len(data1))
print(xy/x_2)
if __name__ == '__main__':
main()