72 lines
1.6 KiB
Python
72 lines
1.6 KiB
Python
|
"""
|
||
|
Komputerowa analiza danych
|
||
|
Zadanie 2
|
||
|
Michał Leśniak 195642
|
||
|
"""
|
||
|
from statistics import mean
|
||
|
|
||
|
|
||
|
def var(lst):
|
||
|
x_mean = mean(lst)
|
||
|
return sum((x-x_mean)**2 for x in lst)/len(lst)
|
||
|
|
||
|
|
||
|
def cov(lst_x, lst_y):
|
||
|
assert len(lst_x) == len(lst_y)
|
||
|
x_mean = mean(lst_x)
|
||
|
y_mean = mean(lst_y)
|
||
|
return sum((lst_x[i]-x_mean)*(lst_y[i]-y_mean) for i in range(len(lst_x)))/len(lst_x)
|
||
|
|
||
|
|
||
|
def load_data(*args):
|
||
|
ret = ()
|
||
|
for arg in args:
|
||
|
with open(arg, 'r') as f:
|
||
|
lines = f.read().splitlines()
|
||
|
lst = []
|
||
|
for line in lines:
|
||
|
lst.append(tuple([float(x.strip()) for x in line.split(',')]))
|
||
|
ret += lst,
|
||
|
return ret
|
||
|
|
||
|
|
||
|
def model1(data):
|
||
|
lst_x = [x for x, _ in data]
|
||
|
lst_y = [y for _, y in data]
|
||
|
|
||
|
a = cov(lst_x, lst_y)/var(lst_x)
|
||
|
print(f'f(X) = {a} * X')
|
||
|
|
||
|
|
||
|
def model2(data):
|
||
|
lst_x = [x for x, _ in data]
|
||
|
lst_y = [y for _, y in data]
|
||
|
|
||
|
a = cov(lst_x, lst_y)/var(lst_x)
|
||
|
b = mean(lst_y) - a*mean(lst_x)
|
||
|
print(f'f(X) = {a} * X + {b}')
|
||
|
|
||
|
|
||
|
def main():
|
||
|
data1, data2, data3, data4 = load_data(
|
||
|
'data1.csv', 'data2.csv', 'data3.csv', 'data4.csv')
|
||
|
print(var([x for x, _ in data1]))
|
||
|
print(cov([x for x, _ in data1], [y for _, y in data1]))
|
||
|
# print(data2)
|
||
|
# print(data3)
|
||
|
# print(data4)
|
||
|
model1(data1)
|
||
|
model1(data2)
|
||
|
model2(data1)
|
||
|
model2(data2)
|
||
|
x_mean = mean([x for x, _ in data1])
|
||
|
y_mean = mean([y for _, y in data1])
|
||
|
xy = sum([x*y for x, y in data1])
|
||
|
x_2 = sum([x**2 for x, _ in data1])
|
||
|
print(sum([2*x-2*x*y for x,y in data1])/len(data1))
|
||
|
print(xy/x_2)
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
main()
|