KAD/zad1/main.py
2021-11-07 20:39:10 +01:00

211 lines
5.4 KiB
Python

"""
Komputerowa analiza danych
Zadanie 1
Michał Leśniak 195642
"""
import locale
locale.setlocale(locale.LC_ALL, '')
SPECIES_NAMES = {
'0': 'setosa',
'1': 'versicolor',
'2': 'virginica'
}
ATTRIBUTE_NAMES = ['Długość działki kielicha [cm]',
'Szerokość działki kielicha [cm]',
'Długość płatka [cm]',
'Szerokość płatka [cm]']
def mean(lst):
sum = 0
for i in lst:
sum += i
return sum/len(lst)
def median(lst):
is_even = len(lst) % 2 == 0
return (lst[len(lst)//2-1]+lst[len(lst)//2])/2 if is_even else lst[len(lst)//2]
def sample_standard_deviation(lst, lst_mean):
sum = 0
for x in [(y-lst_mean)**2 for y in lst]:
sum += x
return (sum/(len(lst)-1))**0.5
def q1(lst, lst_median):
is_even = len(lst) % 2 == 0
idx = len(lst)//2 if is_even else len(lst)//2+1
sorted_list = lst[:idx]
if is_even:
sorted_list.append(lst_median)
return median(sorted_list)
#
def q3(lst, lst_median):
is_even = len(lst) % 2 == 0
idx = len(lst)//2+1 if is_even else len(lst)//2
sorted_list = lst[idx:]
if is_even:
sorted_list.insert(0, lst_median)
return median(sorted_list)
def min(lst):
return lst[0]
def max(lst):
return lst[-1]
def calc_data(lst):
lst_mean = mean(lst)
lst_median = median(lst)
return min(lst), lst_mean, sample_standard_deviation(lst, lst_mean), lst_median, \
q1(lst, lst_median), q3(lst, lst_median), max(lst)
def calc_species_data(species):
total = 0
for x in species.values():
total += x
species_list = []
for key in species:
species_list.append(
(SPECIES_NAMES[key], species[key], species[key]/total))
species_list.append(("Razem", total, 1.0))
return species_list
def percentage_format(x):
return locale.format_string('%.1f%%', x*100)
def float_format(x):
return locale.format_string('%.2f', x)
def print_table(table):
if not table:
return
cell_sizes = [0] * len(table[0])
for i in range(len(table[0])):
for x in table:
if not x:
continue
if len(x[i]) > cell_sizes[i]:
cell_sizes[i] = len(x[i])
header_row_format = ' | '.join((f'{{:^{x}}}' for x in cell_sizes))
row_format = f'{{:<{cell_sizes[0]}}} | ' + \
' | '.join((f'{{:^{x}}}' for x in cell_sizes[1:]))
lines = []
for x in table:
if x == table[0]:
lines.append(header_row_format.format(*x))
continue
if x is None:
lines.append(None)
continue
lines.append(row_format.format(*x))
row_size = 0
for x in lines:
if x and len(x) > row_size:
row_size = len(x)
row_size += 1
lines.insert(1, '='*row_size)
lines.insert(0, '='*row_size)
lines.insert(len(lines), '='*row_size)
for i in range(len(lines)):
if lines[i] is None:
lines[i] = '-'*row_size
for x in lines:
print(x)
def print_data(species_data, trait_data):
species_printable_table = []
species_printable_table.append(("Gatunek", "Liczebność (%)"))
for x in species_data:
if x == species_data[-1]:
species_printable_table.append(None)
species_printable_table.append(
(x[0], "{} ({})".format(x[1], percentage_format(x[2]))))
trait_printable_table = []
trait_printable_table.append(
("Cecha", "Minimum", "Śr. arytm. (± odch. stand.)", "Mediana (Q1 - Q3)", "Maksimum"))
for i in range(len(trait_data)):
trait_printable_table.append((ATTRIBUTE_NAMES[i],
float_format(trait_data[i][0]),
"{}{})".format(float_format(
trait_data[i][1]), float_format(trait_data[i][2])),
"{} ({} - {})".format(
float_format(trait_data[i][3]),
float_format(trait_data[i][4]),
float_format(trait_data[i][5])),
float_format(trait_data[i][6])))
print_table(species_printable_table)
print()
print_table(trait_printable_table)
def main():
# read data
with open("data.csv", "r") as f:
data = f.read().splitlines()
# parse data
sepal_length_list = []
sepal_width_list = []
petal_length_list = []
petal_width_list = []
species_dict = {
'0': 0,
'1': 0,
'2': 0,
}
for line in data:
sepal_length, sepal_width, petal_length, petal_width, species = line.split(
',')
sepal_length_list.append(float(sepal_length))
sepal_width_list.append(float(sepal_width))
petal_length_list.append(float(petal_length))
petal_width_list.append(float(petal_width))
species_dict[species] += 1
sepal_length_list.sort()
sepal_width_list.sort()
petal_length_list.sort()
petal_width_list.sort()
# calculate results
species_data = calc_species_data(species_dict)
trait_data = []
for lst in [sepal_length_list, sepal_width_list, petal_length_list, petal_width_list]:
trait_data.append(calc_data(lst))
# print results
print_data(species_data, trait_data)
if __name__ == "__main__":
import os.path
assert os.path.isfile('data.csv')
main()