This commit is contained in:
Michał Leśniak 2021-10-17 21:28:23 +02:00
commit 40f642966c
3 changed files with 372 additions and 0 deletions

15
zad1/README.txt Normal file
View File

@ -0,0 +1,15 @@
CHARAKTERYSTYKA GATUNKÓW IRYSÓW
Liczba klas (gatunków): 3
Liczba obserwacji: 150 (50 dla każdego gatunku)
Atrybuty (kolumny):
1. długość działki kielicha (ang. sepal length) [cm]
2. szerokość działki kielicha (ang. sepal width) [cm]
3. długość płatka (ang. petal length) [cm]
4. szerokość płatka (ang. petal width) [cm]
5. gatunek (ang. species):
0 - setosa
1 - versicolor
2 - virginica

150
zad1/data.csv Normal file
View File

@ -0,0 +1,150 @@
5.1,3.5,1.4,0.2,0
4.9,3.0,1.4,0.2,0
4.7,3.2,1.3,0.2,0
4.6,3.1,1.5,0.2,0
5.0,3.6,1.4,0.2,0
5.4,3.9,1.7,0.4,0
4.6,3.4,1.4,0.3,0
5.0,3.4,1.5,0.2,0
4.4,2.9,1.4,0.2,0
4.9,3.1,1.5,0.1,0
5.4,3.7,1.5,0.2,0
4.8,3.4,1.6,0.2,0
4.8,3.0,1.4,0.1,0
4.3,3.0,1.1,0.1,0
5.8,4.0,1.2,0.2,0
5.7,4.4,1.5,0.4,0
5.4,3.9,1.3,0.4,0
5.1,3.5,1.4,0.3,0
5.7,3.8,1.7,0.3,0
5.1,3.8,1.5,0.3,0
5.4,3.4,1.7,0.2,0
5.1,3.7,1.5,0.4,0
4.6,3.6,1.0,0.2,0
5.1,3.3,1.7,0.5,0
4.8,3.4,1.9,0.2,0
5.0,3.0,1.6,0.2,0
5.0,3.4,1.6,0.4,0
5.2,3.5,1.5,0.2,0
5.2,3.4,1.4,0.2,0
4.7,3.2,1.6,0.2,0
4.8,3.1,1.6,0.2,0
5.4,3.4,1.5,0.4,0
5.2,4.1,1.5,0.1,0
5.5,4.2,1.4,0.2,0
4.9,3.1,1.5,0.2,0
5.0,3.2,1.2,0.2,0
5.5,3.5,1.3,0.2,0
4.9,3.6,1.4,0.1,0
4.4,3.0,1.3,0.2,0
5.1,3.4,1.5,0.2,0
5.0,3.5,1.3,0.3,0
4.5,2.3,1.3,0.3,0
4.4,3.2,1.3,0.2,0
5.0,3.5,1.6,0.6,0
5.1,3.8,1.9,0.4,0
4.8,3.0,1.4,0.3,0
5.1,3.8,1.6,0.2,0
4.6,3.2,1.4,0.2,0
5.3,3.7,1.5,0.2,0
5.0,3.3,1.4,0.2,0
7.0,3.2,4.7,1.4,1
6.4,3.2,4.5,1.5,1
6.9,3.1,4.9,1.5,1
5.5,2.3,4.0,1.3,1
6.5,2.8,4.6,1.5,1
5.7,2.8,4.5,1.3,1
6.3,3.3,4.7,1.6,1
4.9,2.4,3.3,1.0,1
6.6,2.9,4.6,1.3,1
5.2,2.7,3.9,1.4,1
5.0,2.0,3.5,1.0,1
5.9,3.0,4.2,1.5,1
6.0,2.2,4.0,1.0,1
6.1,2.9,4.7,1.4,1
5.6,2.9,3.6,1.3,1
6.7,3.1,4.4,1.4,1
5.6,3.0,4.5,1.5,1
5.8,2.7,4.1,1.0,1
6.2,2.2,4.5,1.5,1
5.6,2.5,3.9,1.1,1
5.9,3.2,4.8,1.8,1
6.1,2.8,4.0,1.3,1
6.3,2.5,4.9,1.5,1
6.1,2.8,4.7,1.2,1
6.4,2.9,4.3,1.3,1
6.6,3.0,4.4,1.4,1
6.8,2.8,4.8,1.4,1
6.7,3.0,5.0,1.7,1
6.0,2.9,4.5,1.5,1
5.7,2.6,3.5,1.0,1
5.5,2.4,3.8,1.1,1
5.5,2.4,3.7,1.0,1
5.8,2.7,3.9,1.2,1
6.0,2.7,5.1,1.6,1
5.4,3.0,4.5,1.5,1
6.0,3.4,4.5,1.6,1
6.7,3.1,4.7,1.5,1
6.3,2.3,4.4,1.3,1
5.6,3.0,4.1,1.3,1
5.5,2.5,4.0,1.3,1
5.5,2.6,4.4,1.2,1
6.1,3.0,4.6,1.4,1
5.8,2.6,4.0,1.2,1
5.0,2.3,3.3,1.0,1
5.6,2.7,4.2,1.3,1
5.7,3.0,4.2,1.2,1
5.7,2.9,4.2,1.3,1
6.2,2.9,4.3,1.3,1
5.1,2.5,3.0,1.1,1
5.7,2.8,4.1,1.3,1
6.3,3.3,6.0,2.5,2
5.8,2.7,5.1,1.9,2
7.1,3.0,5.9,2.1,2
6.3,2.9,5.6,1.8,2
6.5,3.0,5.8,2.2,2
7.6,3.0,6.6,2.1,2
4.9,2.5,4.5,1.7,2
7.3,2.9,6.3,1.8,2
6.7,2.5,5.8,1.8,2
7.2,3.6,6.1,2.5,2
6.5,3.2,5.1,2.0,2
6.4,2.7,5.3,1.9,2
6.8,3.0,5.5,2.1,2
5.7,2.5,5.0,2.0,2
5.8,2.8,5.1,2.4,2
6.4,3.2,5.3,2.3,2
6.5,3.0,5.5,1.8,2
7.7,3.8,6.7,2.2,2
7.7,2.6,6.9,2.3,2
6.0,2.2,5.0,1.5,2
6.9,3.2,5.7,2.3,2
5.6,2.8,4.9,2.0,2
7.7,2.8,6.7,2.0,2
6.3,2.7,4.9,1.8,2
6.7,3.3,5.7,2.1,2
7.2,3.2,6.0,1.8,2
6.2,2.8,4.8,1.8,2
6.1,3.0,4.9,1.8,2
6.4,2.8,5.6,2.1,2
7.2,3.0,5.8,1.6,2
7.4,2.8,6.1,1.9,2
7.9,3.8,6.4,2.0,2
6.4,2.8,5.6,2.2,2
6.3,2.8,5.1,1.5,2
6.1,2.6,5.6,1.4,2
7.7,3.0,6.1,2.3,2
6.3,3.4,5.6,2.4,2
6.4,3.1,5.5,1.8,2
6.0,3.0,4.8,1.8,2
6.9,3.1,5.4,2.1,2
6.7,3.1,5.6,2.4,2
6.9,3.1,5.1,2.3,2
5.8,2.7,5.1,1.9,2
6.8,3.2,5.9,2.3,2
6.7,3.3,5.7,2.5,2
6.7,3.0,5.2,2.3,2
6.3,2.5,5.0,1.9,2
6.5,3.0,5.2,2.0,2
6.2,3.4,5.4,2.3,2
5.9,3.0,5.1,1.8,2
1 5.1 3.5 1.4 0.2 0
2 4.9 3.0 1.4 0.2 0
3 4.7 3.2 1.3 0.2 0
4 4.6 3.1 1.5 0.2 0
5 5.0 3.6 1.4 0.2 0
6 5.4 3.9 1.7 0.4 0
7 4.6 3.4 1.4 0.3 0
8 5.0 3.4 1.5 0.2 0
9 4.4 2.9 1.4 0.2 0
10 4.9 3.1 1.5 0.1 0
11 5.4 3.7 1.5 0.2 0
12 4.8 3.4 1.6 0.2 0
13 4.8 3.0 1.4 0.1 0
14 4.3 3.0 1.1 0.1 0
15 5.8 4.0 1.2 0.2 0
16 5.7 4.4 1.5 0.4 0
17 5.4 3.9 1.3 0.4 0
18 5.1 3.5 1.4 0.3 0
19 5.7 3.8 1.7 0.3 0
20 5.1 3.8 1.5 0.3 0
21 5.4 3.4 1.7 0.2 0
22 5.1 3.7 1.5 0.4 0
23 4.6 3.6 1.0 0.2 0
24 5.1 3.3 1.7 0.5 0
25 4.8 3.4 1.9 0.2 0
26 5.0 3.0 1.6 0.2 0
27 5.0 3.4 1.6 0.4 0
28 5.2 3.5 1.5 0.2 0
29 5.2 3.4 1.4 0.2 0
30 4.7 3.2 1.6 0.2 0
31 4.8 3.1 1.6 0.2 0
32 5.4 3.4 1.5 0.4 0
33 5.2 4.1 1.5 0.1 0
34 5.5 4.2 1.4 0.2 0
35 4.9 3.1 1.5 0.2 0
36 5.0 3.2 1.2 0.2 0
37 5.5 3.5 1.3 0.2 0
38 4.9 3.6 1.4 0.1 0
39 4.4 3.0 1.3 0.2 0
40 5.1 3.4 1.5 0.2 0
41 5.0 3.5 1.3 0.3 0
42 4.5 2.3 1.3 0.3 0
43 4.4 3.2 1.3 0.2 0
44 5.0 3.5 1.6 0.6 0
45 5.1 3.8 1.9 0.4 0
46 4.8 3.0 1.4 0.3 0
47 5.1 3.8 1.6 0.2 0
48 4.6 3.2 1.4 0.2 0
49 5.3 3.7 1.5 0.2 0
50 5.0 3.3 1.4 0.2 0
51 7.0 3.2 4.7 1.4 1
52 6.4 3.2 4.5 1.5 1
53 6.9 3.1 4.9 1.5 1
54 5.5 2.3 4.0 1.3 1
55 6.5 2.8 4.6 1.5 1
56 5.7 2.8 4.5 1.3 1
57 6.3 3.3 4.7 1.6 1
58 4.9 2.4 3.3 1.0 1
59 6.6 2.9 4.6 1.3 1
60 5.2 2.7 3.9 1.4 1
61 5.0 2.0 3.5 1.0 1
62 5.9 3.0 4.2 1.5 1
63 6.0 2.2 4.0 1.0 1
64 6.1 2.9 4.7 1.4 1
65 5.6 2.9 3.6 1.3 1
66 6.7 3.1 4.4 1.4 1
67 5.6 3.0 4.5 1.5 1
68 5.8 2.7 4.1 1.0 1
69 6.2 2.2 4.5 1.5 1
70 5.6 2.5 3.9 1.1 1
71 5.9 3.2 4.8 1.8 1
72 6.1 2.8 4.0 1.3 1
73 6.3 2.5 4.9 1.5 1
74 6.1 2.8 4.7 1.2 1
75 6.4 2.9 4.3 1.3 1
76 6.6 3.0 4.4 1.4 1
77 6.8 2.8 4.8 1.4 1
78 6.7 3.0 5.0 1.7 1
79 6.0 2.9 4.5 1.5 1
80 5.7 2.6 3.5 1.0 1
81 5.5 2.4 3.8 1.1 1
82 5.5 2.4 3.7 1.0 1
83 5.8 2.7 3.9 1.2 1
84 6.0 2.7 5.1 1.6 1
85 5.4 3.0 4.5 1.5 1
86 6.0 3.4 4.5 1.6 1
87 6.7 3.1 4.7 1.5 1
88 6.3 2.3 4.4 1.3 1
89 5.6 3.0 4.1 1.3 1
90 5.5 2.5 4.0 1.3 1
91 5.5 2.6 4.4 1.2 1
92 6.1 3.0 4.6 1.4 1
93 5.8 2.6 4.0 1.2 1
94 5.0 2.3 3.3 1.0 1
95 5.6 2.7 4.2 1.3 1
96 5.7 3.0 4.2 1.2 1
97 5.7 2.9 4.2 1.3 1
98 6.2 2.9 4.3 1.3 1
99 5.1 2.5 3.0 1.1 1
100 5.7 2.8 4.1 1.3 1
101 6.3 3.3 6.0 2.5 2
102 5.8 2.7 5.1 1.9 2
103 7.1 3.0 5.9 2.1 2
104 6.3 2.9 5.6 1.8 2
105 6.5 3.0 5.8 2.2 2
106 7.6 3.0 6.6 2.1 2
107 4.9 2.5 4.5 1.7 2
108 7.3 2.9 6.3 1.8 2
109 6.7 2.5 5.8 1.8 2
110 7.2 3.6 6.1 2.5 2
111 6.5 3.2 5.1 2.0 2
112 6.4 2.7 5.3 1.9 2
113 6.8 3.0 5.5 2.1 2
114 5.7 2.5 5.0 2.0 2
115 5.8 2.8 5.1 2.4 2
116 6.4 3.2 5.3 2.3 2
117 6.5 3.0 5.5 1.8 2
118 7.7 3.8 6.7 2.2 2
119 7.7 2.6 6.9 2.3 2
120 6.0 2.2 5.0 1.5 2
121 6.9 3.2 5.7 2.3 2
122 5.6 2.8 4.9 2.0 2
123 7.7 2.8 6.7 2.0 2
124 6.3 2.7 4.9 1.8 2
125 6.7 3.3 5.7 2.1 2
126 7.2 3.2 6.0 1.8 2
127 6.2 2.8 4.8 1.8 2
128 6.1 3.0 4.9 1.8 2
129 6.4 2.8 5.6 2.1 2
130 7.2 3.0 5.8 1.6 2
131 7.4 2.8 6.1 1.9 2
132 7.9 3.8 6.4 2.0 2
133 6.4 2.8 5.6 2.2 2
134 6.3 2.8 5.1 1.5 2
135 6.1 2.6 5.6 1.4 2
136 7.7 3.0 6.1 2.3 2
137 6.3 3.4 5.6 2.4 2
138 6.4 3.1 5.5 1.8 2
139 6.0 3.0 4.8 1.8 2
140 6.9 3.1 5.4 2.1 2
141 6.7 3.1 5.6 2.4 2
142 6.9 3.1 5.1 2.3 2
143 5.8 2.7 5.1 1.9 2
144 6.8 3.2 5.9 2.3 2
145 6.7 3.3 5.7 2.5 2
146 6.7 3.0 5.2 2.3 2
147 6.3 2.5 5.0 1.9 2
148 6.5 3.0 5.2 2.0 2
149 6.2 3.4 5.4 2.3 2
150 5.9 3.0 5.1 1.8 2

207
zad1/main.py Normal file
View File

@ -0,0 +1,207 @@
"""
Komputerowa analiza danych
Zadanie 1
Michał Leśniak 195642
"""
import math
import locale
locale.setlocale(locale.LC_ALL, '')
SPECIES_NAMES = {
'0': 'setosa',
'1': 'versicolor',
'2': 'virginica'
}
ATTRIBUTE_NAMES = ['Długość działki kielicha [cm]',
'Szerokość działki kielicha [cm]',
'Długość płatka [cm]',
'Szerokość płatka [cm]']
def mean(lst):
sum = 0
for i in lst:
sum += i
return sum/len(lst)
def median(lst, is_even):
return (lst[len(lst)//2-1]+lst[len(lst)//2])/2 if is_even else lst[len(lst)//2]
def sample_standard_deviation(lst, lst_mean):
sum = 0
for x in [(y-lst_mean)**2 for y in lst]:
sum += x
return math.sqrt(sum/(len(lst)-1))
def q1(lst, is_even, lst_median):
idx = len(lst)//2-1 if is_even else len(lst)//2+1
sorted_list = lst[:idx]
if is_even:
sorted_list.append(lst_median)
return median(sorted_list, is_even)
def q3(lst, is_even, lst_median):
idx = len(lst)//2+1 if is_even else len(lst)//2
sorted_list = lst[idx:]
if is_even:
sorted_list.insert(0, lst_median)
return median(sorted_list, is_even)
def min(lst):
return lst[0]
def max(lst):
return lst[-1]
def calc_data(lst):
is_even = len(lst) % 2 == 0
lst_mean = mean(lst)
lst_median = median(lst, is_even)
return min(lst), lst_mean, sample_standard_deviation(lst, lst_mean), lst_median, q1(lst, is_even, lst_median), q3(lst, is_even, lst_median), max(lst)
def calc_species_data(species):
total = 0
for x in species.values():
total += x
species_list = []
for key in species:
species_list.append(
(SPECIES_NAMES[key], species[key], species[key]/total))
species_list.append(("Razem", total, 1.0))
return species_list
def percentage_format(x):
return locale.format_string('%.1f%%', x*100)
def float_format(x):
return locale.format_string('%.2f', x)
def print_table(table):
if not table:
return
cell_sizes = [0] * len(table[0])
for i in range(len(table[0])):
for x in table:
if not x:
continue
if len(x[i]) > cell_sizes[i]:
cell_sizes[i] = len(x[i])
header_row_format = ' | '.join((f'{{:^{x}}}' for x in cell_sizes))
row_format = f'{{:<{cell_sizes[0]}}} | ' + \
' | '.join((f'{{:^{x}}}' for x in cell_sizes[1:]))
lines = []
for x in table:
if x == table[0]:
lines.append(header_row_format.format(*x))
continue
if x is None:
lines.append(None)
continue
lines.append(row_format.format(*x))
row_size = 0
for x in lines:
if x and len(x) > row_size:
row_size = len(x)
row_size += 1
lines.insert(1, '='*row_size)
lines.insert(0, '='*row_size)
lines.insert(len(lines), '='*row_size)
for i in range(len(lines)):
if lines[i] is None:
lines[i] = '-'*row_size
for x in lines:
print(x)
def print_data(species_data, trait_data):
species_printable_table = []
species_printable_table.append(("Gatunek", "Liczebność (%)"))
for x in species_data:
if x == species_data[-1]:
species_printable_table.append(None)
species_printable_table.append(
(x[0], "{} ({})".format(x[1], percentage_format(x[2]))))
trait_printable_table = []
trait_printable_table.append(
("Cecha", "Minimum", "Śr. arytm. (± odch. stand.)", "Mediana (Q1 - Q3)", "Maksimum"))
for i in range(len(trait_data)):
trait_printable_table.append((ATTRIBUTE_NAMES[i],
float_format(trait_data[i][0]),
"{}{})".format(float_format(
trait_data[i][1]), float_format(trait_data[i][2])),
"{} ({} - {})".format(
float_format(trait_data[i][3]),
float_format(trait_data[i][4]),
float_format(trait_data[i][5])),
float_format(trait_data[i][6])))
print_table(species_printable_table)
print()
print_table(trait_printable_table)
def main():
# read data
with open("data.csv", "r") as f:
data = f.read().splitlines()
# parse data
sepal_length_list = []
sepal_width_list = []
petal_length_list = []
petal_width_list = []
species_dict = {
'0': 0,
'1': 0,
'2': 0,
}
for line in data:
sepal_length, sepal_width, petal_length, petal_width, species = line.split(
',')
sepal_length_list.append(float(sepal_length))
sepal_width_list.append(float(sepal_width))
petal_length_list.append(float(petal_length))
petal_width_list.append(float(petal_width))
species_dict[species] += 1
sepal_length_list.sort()
sepal_width_list.sort()
petal_length_list.sort()
petal_width_list.sort()
# calculate results
species_data = calc_species_data(species_dict)
trait_data = []
for lst in [sepal_length_list, sepal_width_list, petal_length_list, petal_width_list]:
trait_data.append(calc_data(lst))
# print results
print_data(species_data, trait_data)
if __name__ == "__main__":
import os.path
assert os.path.isfile('data.csv')
main()