KAD/zad2/zad2.py

72 lines
1.6 KiB
Python
Raw Normal View History

2021-12-05 19:59:44 +01:00
"""
Komputerowa analiza danych
Zadanie 2
Michał Leśniak 195642
"""
from statistics import mean
def var(lst):
x_mean = mean(lst)
return sum((x-x_mean)**2 for x in lst)/len(lst)
def cov(lst_x, lst_y):
assert len(lst_x) == len(lst_y)
x_mean = mean(lst_x)
y_mean = mean(lst_y)
return sum((lst_x[i]-x_mean)*(lst_y[i]-y_mean) for i in range(len(lst_x)))/len(lst_x)
def load_data(*args):
ret = ()
for arg in args:
with open(arg, 'r') as f:
lines = f.read().splitlines()
lst = []
for line in lines:
lst.append(tuple([float(x.strip()) for x in line.split(',')]))
ret += lst,
return ret
def model1(data):
lst_x = [x for x, _ in data]
lst_y = [y for _, y in data]
a = cov(lst_x, lst_y)/var(lst_x)
print(f'f(X) = {a} * X')
def model2(data):
lst_x = [x for x, _ in data]
lst_y = [y for _, y in data]
a = cov(lst_x, lst_y)/var(lst_x)
b = mean(lst_y) - a*mean(lst_x)
print(f'f(X) = {a} * X + {b}')
def main():
data1, data2, data3, data4 = load_data(
'data1.csv', 'data2.csv', 'data3.csv', 'data4.csv')
print(var([x for x, _ in data1]))
print(cov([x for x, _ in data1], [y for _, y in data1]))
# print(data2)
# print(data3)
# print(data4)
model1(data1)
model1(data2)
model2(data1)
model2(data2)
x_mean = mean([x for x, _ in data1])
y_mean = mean([y for _, y in data1])
xy = sum([x*y for x, y in data1])
x_2 = sum([x**2 for x, _ in data1])
print(sum([2*x-2*x*y for x,y in data1])/len(data1))
print(xy/x_2)
if __name__ == '__main__':
main()