zad 1
This commit is contained in:
commit
40f642966c
15
zad1/README.txt
Normal file
15
zad1/README.txt
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
CHARAKTERYSTYKA GATUNKÓW IRYSÓW
|
||||||
|
|
||||||
|
Liczba klas (gatunków): 3
|
||||||
|
|
||||||
|
Liczba obserwacji: 150 (50 dla każdego gatunku)
|
||||||
|
|
||||||
|
Atrybuty (kolumny):
|
||||||
|
1. długość działki kielicha (ang. sepal length) [cm]
|
||||||
|
2. szerokość działki kielicha (ang. sepal width) [cm]
|
||||||
|
3. długość płatka (ang. petal length) [cm]
|
||||||
|
4. szerokość płatka (ang. petal width) [cm]
|
||||||
|
5. gatunek (ang. species):
|
||||||
|
0 - setosa
|
||||||
|
1 - versicolor
|
||||||
|
2 - virginica
|
150
zad1/data.csv
Normal file
150
zad1/data.csv
Normal file
@ -0,0 +1,150 @@
|
|||||||
|
5.1,3.5,1.4,0.2,0
|
||||||
|
4.9,3.0,1.4,0.2,0
|
||||||
|
4.7,3.2,1.3,0.2,0
|
||||||
|
4.6,3.1,1.5,0.2,0
|
||||||
|
5.0,3.6,1.4,0.2,0
|
||||||
|
5.4,3.9,1.7,0.4,0
|
||||||
|
4.6,3.4,1.4,0.3,0
|
||||||
|
5.0,3.4,1.5,0.2,0
|
||||||
|
4.4,2.9,1.4,0.2,0
|
||||||
|
4.9,3.1,1.5,0.1,0
|
||||||
|
5.4,3.7,1.5,0.2,0
|
||||||
|
4.8,3.4,1.6,0.2,0
|
||||||
|
4.8,3.0,1.4,0.1,0
|
||||||
|
4.3,3.0,1.1,0.1,0
|
||||||
|
5.8,4.0,1.2,0.2,0
|
||||||
|
5.7,4.4,1.5,0.4,0
|
||||||
|
5.4,3.9,1.3,0.4,0
|
||||||
|
5.1,3.5,1.4,0.3,0
|
||||||
|
5.7,3.8,1.7,0.3,0
|
||||||
|
5.1,3.8,1.5,0.3,0
|
||||||
|
5.4,3.4,1.7,0.2,0
|
||||||
|
5.1,3.7,1.5,0.4,0
|
||||||
|
4.6,3.6,1.0,0.2,0
|
||||||
|
5.1,3.3,1.7,0.5,0
|
||||||
|
4.8,3.4,1.9,0.2,0
|
||||||
|
5.0,3.0,1.6,0.2,0
|
||||||
|
5.0,3.4,1.6,0.4,0
|
||||||
|
5.2,3.5,1.5,0.2,0
|
||||||
|
5.2,3.4,1.4,0.2,0
|
||||||
|
4.7,3.2,1.6,0.2,0
|
||||||
|
4.8,3.1,1.6,0.2,0
|
||||||
|
5.4,3.4,1.5,0.4,0
|
||||||
|
5.2,4.1,1.5,0.1,0
|
||||||
|
5.5,4.2,1.4,0.2,0
|
||||||
|
4.9,3.1,1.5,0.2,0
|
||||||
|
5.0,3.2,1.2,0.2,0
|
||||||
|
5.5,3.5,1.3,0.2,0
|
||||||
|
4.9,3.6,1.4,0.1,0
|
||||||
|
4.4,3.0,1.3,0.2,0
|
||||||
|
5.1,3.4,1.5,0.2,0
|
||||||
|
5.0,3.5,1.3,0.3,0
|
||||||
|
4.5,2.3,1.3,0.3,0
|
||||||
|
4.4,3.2,1.3,0.2,0
|
||||||
|
5.0,3.5,1.6,0.6,0
|
||||||
|
5.1,3.8,1.9,0.4,0
|
||||||
|
4.8,3.0,1.4,0.3,0
|
||||||
|
5.1,3.8,1.6,0.2,0
|
||||||
|
4.6,3.2,1.4,0.2,0
|
||||||
|
5.3,3.7,1.5,0.2,0
|
||||||
|
5.0,3.3,1.4,0.2,0
|
||||||
|
7.0,3.2,4.7,1.4,1
|
||||||
|
6.4,3.2,4.5,1.5,1
|
||||||
|
6.9,3.1,4.9,1.5,1
|
||||||
|
5.5,2.3,4.0,1.3,1
|
||||||
|
6.5,2.8,4.6,1.5,1
|
||||||
|
5.7,2.8,4.5,1.3,1
|
||||||
|
6.3,3.3,4.7,1.6,1
|
||||||
|
4.9,2.4,3.3,1.0,1
|
||||||
|
6.6,2.9,4.6,1.3,1
|
||||||
|
5.2,2.7,3.9,1.4,1
|
||||||
|
5.0,2.0,3.5,1.0,1
|
||||||
|
5.9,3.0,4.2,1.5,1
|
||||||
|
6.0,2.2,4.0,1.0,1
|
||||||
|
6.1,2.9,4.7,1.4,1
|
||||||
|
5.6,2.9,3.6,1.3,1
|
||||||
|
6.7,3.1,4.4,1.4,1
|
||||||
|
5.6,3.0,4.5,1.5,1
|
||||||
|
5.8,2.7,4.1,1.0,1
|
||||||
|
6.2,2.2,4.5,1.5,1
|
||||||
|
5.6,2.5,3.9,1.1,1
|
||||||
|
5.9,3.2,4.8,1.8,1
|
||||||
|
6.1,2.8,4.0,1.3,1
|
||||||
|
6.3,2.5,4.9,1.5,1
|
||||||
|
6.1,2.8,4.7,1.2,1
|
||||||
|
6.4,2.9,4.3,1.3,1
|
||||||
|
6.6,3.0,4.4,1.4,1
|
||||||
|
6.8,2.8,4.8,1.4,1
|
||||||
|
6.7,3.0,5.0,1.7,1
|
||||||
|
6.0,2.9,4.5,1.5,1
|
||||||
|
5.7,2.6,3.5,1.0,1
|
||||||
|
5.5,2.4,3.8,1.1,1
|
||||||
|
5.5,2.4,3.7,1.0,1
|
||||||
|
5.8,2.7,3.9,1.2,1
|
||||||
|
6.0,2.7,5.1,1.6,1
|
||||||
|
5.4,3.0,4.5,1.5,1
|
||||||
|
6.0,3.4,4.5,1.6,1
|
||||||
|
6.7,3.1,4.7,1.5,1
|
||||||
|
6.3,2.3,4.4,1.3,1
|
||||||
|
5.6,3.0,4.1,1.3,1
|
||||||
|
5.5,2.5,4.0,1.3,1
|
||||||
|
5.5,2.6,4.4,1.2,1
|
||||||
|
6.1,3.0,4.6,1.4,1
|
||||||
|
5.8,2.6,4.0,1.2,1
|
||||||
|
5.0,2.3,3.3,1.0,1
|
||||||
|
5.6,2.7,4.2,1.3,1
|
||||||
|
5.7,3.0,4.2,1.2,1
|
||||||
|
5.7,2.9,4.2,1.3,1
|
||||||
|
6.2,2.9,4.3,1.3,1
|
||||||
|
5.1,2.5,3.0,1.1,1
|
||||||
|
5.7,2.8,4.1,1.3,1
|
||||||
|
6.3,3.3,6.0,2.5,2
|
||||||
|
5.8,2.7,5.1,1.9,2
|
||||||
|
7.1,3.0,5.9,2.1,2
|
||||||
|
6.3,2.9,5.6,1.8,2
|
||||||
|
6.5,3.0,5.8,2.2,2
|
||||||
|
7.6,3.0,6.6,2.1,2
|
||||||
|
4.9,2.5,4.5,1.7,2
|
||||||
|
7.3,2.9,6.3,1.8,2
|
||||||
|
6.7,2.5,5.8,1.8,2
|
||||||
|
7.2,3.6,6.1,2.5,2
|
||||||
|
6.5,3.2,5.1,2.0,2
|
||||||
|
6.4,2.7,5.3,1.9,2
|
||||||
|
6.8,3.0,5.5,2.1,2
|
||||||
|
5.7,2.5,5.0,2.0,2
|
||||||
|
5.8,2.8,5.1,2.4,2
|
||||||
|
6.4,3.2,5.3,2.3,2
|
||||||
|
6.5,3.0,5.5,1.8,2
|
||||||
|
7.7,3.8,6.7,2.2,2
|
||||||
|
7.7,2.6,6.9,2.3,2
|
||||||
|
6.0,2.2,5.0,1.5,2
|
||||||
|
6.9,3.2,5.7,2.3,2
|
||||||
|
5.6,2.8,4.9,2.0,2
|
||||||
|
7.7,2.8,6.7,2.0,2
|
||||||
|
6.3,2.7,4.9,1.8,2
|
||||||
|
6.7,3.3,5.7,2.1,2
|
||||||
|
7.2,3.2,6.0,1.8,2
|
||||||
|
6.2,2.8,4.8,1.8,2
|
||||||
|
6.1,3.0,4.9,1.8,2
|
||||||
|
6.4,2.8,5.6,2.1,2
|
||||||
|
7.2,3.0,5.8,1.6,2
|
||||||
|
7.4,2.8,6.1,1.9,2
|
||||||
|
7.9,3.8,6.4,2.0,2
|
||||||
|
6.4,2.8,5.6,2.2,2
|
||||||
|
6.3,2.8,5.1,1.5,2
|
||||||
|
6.1,2.6,5.6,1.4,2
|
||||||
|
7.7,3.0,6.1,2.3,2
|
||||||
|
6.3,3.4,5.6,2.4,2
|
||||||
|
6.4,3.1,5.5,1.8,2
|
||||||
|
6.0,3.0,4.8,1.8,2
|
||||||
|
6.9,3.1,5.4,2.1,2
|
||||||
|
6.7,3.1,5.6,2.4,2
|
||||||
|
6.9,3.1,5.1,2.3,2
|
||||||
|
5.8,2.7,5.1,1.9,2
|
||||||
|
6.8,3.2,5.9,2.3,2
|
||||||
|
6.7,3.3,5.7,2.5,2
|
||||||
|
6.7,3.0,5.2,2.3,2
|
||||||
|
6.3,2.5,5.0,1.9,2
|
||||||
|
6.5,3.0,5.2,2.0,2
|
||||||
|
6.2,3.4,5.4,2.3,2
|
||||||
|
5.9,3.0,5.1,1.8,2
|
|
207
zad1/main.py
Normal file
207
zad1/main.py
Normal file
@ -0,0 +1,207 @@
|
|||||||
|
"""
|
||||||
|
Komputerowa analiza danych
|
||||||
|
Zadanie 1
|
||||||
|
Michał Leśniak 195642
|
||||||
|
"""
|
||||||
|
import math
|
||||||
|
import locale
|
||||||
|
locale.setlocale(locale.LC_ALL, '')
|
||||||
|
|
||||||
|
SPECIES_NAMES = {
|
||||||
|
'0': 'setosa',
|
||||||
|
'1': 'versicolor',
|
||||||
|
'2': 'virginica'
|
||||||
|
}
|
||||||
|
|
||||||
|
ATTRIBUTE_NAMES = ['Długość działki kielicha [cm]',
|
||||||
|
'Szerokość działki kielicha [cm]',
|
||||||
|
'Długość płatka [cm]',
|
||||||
|
'Szerokość płatka [cm]']
|
||||||
|
|
||||||
|
|
||||||
|
def mean(lst):
|
||||||
|
sum = 0
|
||||||
|
for i in lst:
|
||||||
|
sum += i
|
||||||
|
return sum/len(lst)
|
||||||
|
|
||||||
|
|
||||||
|
def median(lst, is_even):
|
||||||
|
return (lst[len(lst)//2-1]+lst[len(lst)//2])/2 if is_even else lst[len(lst)//2]
|
||||||
|
|
||||||
|
|
||||||
|
def sample_standard_deviation(lst, lst_mean):
|
||||||
|
sum = 0
|
||||||
|
for x in [(y-lst_mean)**2 for y in lst]:
|
||||||
|
sum += x
|
||||||
|
return math.sqrt(sum/(len(lst)-1))
|
||||||
|
|
||||||
|
|
||||||
|
def q1(lst, is_even, lst_median):
|
||||||
|
idx = len(lst)//2-1 if is_even else len(lst)//2+1
|
||||||
|
sorted_list = lst[:idx]
|
||||||
|
if is_even:
|
||||||
|
sorted_list.append(lst_median)
|
||||||
|
|
||||||
|
return median(sorted_list, is_even)
|
||||||
|
|
||||||
|
|
||||||
|
def q3(lst, is_even, lst_median):
|
||||||
|
idx = len(lst)//2+1 if is_even else len(lst)//2
|
||||||
|
sorted_list = lst[idx:]
|
||||||
|
if is_even:
|
||||||
|
sorted_list.insert(0, lst_median)
|
||||||
|
|
||||||
|
return median(sorted_list, is_even)
|
||||||
|
|
||||||
|
|
||||||
|
def min(lst):
|
||||||
|
return lst[0]
|
||||||
|
|
||||||
|
|
||||||
|
def max(lst):
|
||||||
|
return lst[-1]
|
||||||
|
|
||||||
|
|
||||||
|
def calc_data(lst):
|
||||||
|
is_even = len(lst) % 2 == 0
|
||||||
|
lst_mean = mean(lst)
|
||||||
|
lst_median = median(lst, is_even)
|
||||||
|
return min(lst), lst_mean, sample_standard_deviation(lst, lst_mean), lst_median, q1(lst, is_even, lst_median), q3(lst, is_even, lst_median), max(lst)
|
||||||
|
|
||||||
|
|
||||||
|
def calc_species_data(species):
|
||||||
|
total = 0
|
||||||
|
for x in species.values():
|
||||||
|
total += x
|
||||||
|
|
||||||
|
species_list = []
|
||||||
|
for key in species:
|
||||||
|
species_list.append(
|
||||||
|
(SPECIES_NAMES[key], species[key], species[key]/total))
|
||||||
|
|
||||||
|
species_list.append(("Razem", total, 1.0))
|
||||||
|
|
||||||
|
return species_list
|
||||||
|
|
||||||
|
|
||||||
|
def percentage_format(x):
|
||||||
|
return locale.format_string('%.1f%%', x*100)
|
||||||
|
|
||||||
|
|
||||||
|
def float_format(x):
|
||||||
|
return locale.format_string('%.2f', x)
|
||||||
|
|
||||||
|
|
||||||
|
def print_table(table):
|
||||||
|
if not table:
|
||||||
|
return
|
||||||
|
cell_sizes = [0] * len(table[0])
|
||||||
|
for i in range(len(table[0])):
|
||||||
|
for x in table:
|
||||||
|
if not x:
|
||||||
|
continue
|
||||||
|
if len(x[i]) > cell_sizes[i]:
|
||||||
|
cell_sizes[i] = len(x[i])
|
||||||
|
|
||||||
|
header_row_format = ' | '.join((f'{{:^{x}}}' for x in cell_sizes))
|
||||||
|
row_format = f'{{:<{cell_sizes[0]}}} | ' + \
|
||||||
|
' | '.join((f'{{:^{x}}}' for x in cell_sizes[1:]))
|
||||||
|
|
||||||
|
lines = []
|
||||||
|
for x in table:
|
||||||
|
if x == table[0]:
|
||||||
|
lines.append(header_row_format.format(*x))
|
||||||
|
continue
|
||||||
|
if x is None:
|
||||||
|
lines.append(None)
|
||||||
|
continue
|
||||||
|
lines.append(row_format.format(*x))
|
||||||
|
row_size = 0
|
||||||
|
for x in lines:
|
||||||
|
if x and len(x) > row_size:
|
||||||
|
row_size = len(x)
|
||||||
|
row_size += 1
|
||||||
|
lines.insert(1, '='*row_size)
|
||||||
|
lines.insert(0, '='*row_size)
|
||||||
|
lines.insert(len(lines), '='*row_size)
|
||||||
|
for i in range(len(lines)):
|
||||||
|
if lines[i] is None:
|
||||||
|
lines[i] = '-'*row_size
|
||||||
|
for x in lines:
|
||||||
|
print(x)
|
||||||
|
|
||||||
|
|
||||||
|
def print_data(species_data, trait_data):
|
||||||
|
species_printable_table = []
|
||||||
|
species_printable_table.append(("Gatunek", "Liczebność (%)"))
|
||||||
|
for x in species_data:
|
||||||
|
if x == species_data[-1]:
|
||||||
|
species_printable_table.append(None)
|
||||||
|
species_printable_table.append(
|
||||||
|
(x[0], "{} ({})".format(x[1], percentage_format(x[2]))))
|
||||||
|
|
||||||
|
trait_printable_table = []
|
||||||
|
trait_printable_table.append(
|
||||||
|
("Cecha", "Minimum", "Śr. arytm. (± odch. stand.)", "Mediana (Q1 - Q3)", "Maksimum"))
|
||||||
|
for i in range(len(trait_data)):
|
||||||
|
trait_printable_table.append((ATTRIBUTE_NAMES[i],
|
||||||
|
float_format(trait_data[i][0]),
|
||||||
|
"{} (±{})".format(float_format(
|
||||||
|
trait_data[i][1]), float_format(trait_data[i][2])),
|
||||||
|
"{} ({} - {})".format(
|
||||||
|
float_format(trait_data[i][3]),
|
||||||
|
float_format(trait_data[i][4]),
|
||||||
|
float_format(trait_data[i][5])),
|
||||||
|
float_format(trait_data[i][6])))
|
||||||
|
|
||||||
|
print_table(species_printable_table)
|
||||||
|
print()
|
||||||
|
print_table(trait_printable_table)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# read data
|
||||||
|
with open("data.csv", "r") as f:
|
||||||
|
data = f.read().splitlines()
|
||||||
|
|
||||||
|
# parse data
|
||||||
|
sepal_length_list = []
|
||||||
|
sepal_width_list = []
|
||||||
|
petal_length_list = []
|
||||||
|
petal_width_list = []
|
||||||
|
species_dict = {
|
||||||
|
'0': 0,
|
||||||
|
'1': 0,
|
||||||
|
'2': 0,
|
||||||
|
}
|
||||||
|
for line in data:
|
||||||
|
sepal_length, sepal_width, petal_length, petal_width, species = line.split(
|
||||||
|
',')
|
||||||
|
sepal_length_list.append(float(sepal_length))
|
||||||
|
sepal_width_list.append(float(sepal_width))
|
||||||
|
petal_length_list.append(float(petal_length))
|
||||||
|
petal_width_list.append(float(petal_width))
|
||||||
|
species_dict[species] += 1
|
||||||
|
|
||||||
|
sepal_length_list.sort()
|
||||||
|
sepal_width_list.sort()
|
||||||
|
petal_length_list.sort()
|
||||||
|
petal_width_list.sort()
|
||||||
|
|
||||||
|
# calculate results
|
||||||
|
species_data = calc_species_data(species_dict)
|
||||||
|
|
||||||
|
trait_data = []
|
||||||
|
|
||||||
|
for lst in [sepal_length_list, sepal_width_list, petal_length_list, petal_width_list]:
|
||||||
|
trait_data.append(calc_data(lst))
|
||||||
|
|
||||||
|
# print results
|
||||||
|
print_data(species_data, trait_data)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import os.path
|
||||||
|
assert os.path.isfile('data.csv')
|
||||||
|
main()
|
Loading…
Reference in New Issue
Block a user