""" Komputerowa analiza danych Zadanie 1 Michał Leśniak 195642 """ import locale locale.setlocale(locale.LC_ALL, '') SPECIES_NAMES = { '0': 'setosa', '1': 'versicolor', '2': 'virginica' } ATTRIBUTE_NAMES = ['Długość działki kielicha [cm]', 'Szerokość działki kielicha [cm]', 'Długość płatka [cm]', 'Szerokość płatka [cm]'] def mean(lst): sum = 0 for i in lst: sum += i return sum/len(lst) def median(lst): is_even = len(lst) % 2 == 0 return (lst[len(lst)//2-1]+lst[len(lst)//2])/2 if is_even else lst[len(lst)//2] def sample_standard_deviation(lst, lst_mean): sum = 0 for x in [(y-lst_mean)**2 for y in lst]: sum += x return (sum/(len(lst)-1))**0.5 def q1(lst, lst_median): is_even = len(lst) % 2 == 0 idx = len(lst)//2 sorted_list = lst[:idx] if not is_even: sorted_list.append(lst_median) return median(sorted_list) def q3(lst, lst_median): is_even = len(lst) % 2 == 0 idx = len(lst)//2 if is_even else len(lst)//2+1 sorted_list = lst[idx:] if not is_even: sorted_list.insert(0, lst_median) return median(sorted_list) def min(lst): return lst[0] def max(lst): return lst[-1] def calc_data(lst): lst_mean = mean(lst) lst_median = median(lst) return min(lst), lst_mean, sample_standard_deviation(lst, lst_mean), lst_median, \ q1(lst, lst_median), q3(lst, lst_median), max(lst) def calc_species_data(species): total = 0 for x in species.values(): total += x species_list = [] for key in species: species_list.append( (SPECIES_NAMES[key], species[key], species[key]/total)) species_list.append(("Razem", total, 1.0)) return species_list def percentage_format(x): return locale.format_string('%.1f%%', x*100) def float_format(x): return locale.format_string('%.2f', x) def print_table(table): if not table: return cell_sizes = [0] * len(table[0]) for i in range(len(table[0])): for x in table: if not x: continue if len(x[i]) > cell_sizes[i]: cell_sizes[i] = len(x[i]) header_row_format = ' | '.join((f'{{:^{x}}}' for x in cell_sizes)) row_format = f'{{:<{cell_sizes[0]}}} | ' + \ ' | '.join((f'{{:^{x}}}' for x in cell_sizes[1:])) lines = [] for x in table: if x == table[0]: lines.append(header_row_format.format(*x)) continue if x is None: lines.append(None) continue lines.append(row_format.format(*x)) row_size = 0 for x in lines: if x and len(x) > row_size: row_size = len(x) row_size += 1 lines.insert(1, '='*row_size) lines.insert(0, '='*row_size) lines.insert(len(lines), '='*row_size) for i in range(len(lines)): if lines[i] is None: lines[i] = '-'*row_size for x in lines: print(x) def print_data(species_data, trait_data): species_printable_table = [] species_printable_table.append(("Gatunek", "Liczebność (%)")) for x in species_data: if x == species_data[-1]: species_printable_table.append(None) species_printable_table.append( (x[0], "{} ({})".format(x[1], percentage_format(x[2])))) trait_printable_table = [] trait_printable_table.append( ("Cecha", "Minimum", "Śr. arytm. (± odch. stand.)", "Mediana (Q1 - Q3)", "Maksimum")) for i in range(len(trait_data)): trait_printable_table.append((ATTRIBUTE_NAMES[i], float_format(trait_data[i][0]), "{} (±{})".format(float_format( trait_data[i][1]), float_format(trait_data[i][2])), "{} ({} - {})".format( float_format(trait_data[i][3]), float_format(trait_data[i][4]), float_format(trait_data[i][5])), float_format(trait_data[i][6]))) print_table(species_printable_table) print() print_table(trait_printable_table) def main(): # read data with open("data.csv", "r") as f: data = f.read().splitlines() # parse data sepal_length_list = [] sepal_width_list = [] petal_length_list = [] petal_width_list = [] species_dict = { '0': 0, '1': 0, '2': 0, } for line in data: sepal_length, sepal_width, petal_length, petal_width, species = line.split( ',') sepal_length_list.append(float(sepal_length)) sepal_width_list.append(float(sepal_width)) petal_length_list.append(float(petal_length)) petal_width_list.append(float(petal_width)) species_dict[species] += 1 sepal_length_list.sort() sepal_width_list.sort() petal_length_list.sort() petal_width_list.sort() # calculate results species_data = calc_species_data(species_dict) trait_data = [] for lst in [sepal_length_list, sepal_width_list, petal_length_list, petal_width_list]: trait_data.append(calc_data(lst)) # print results print_data(species_data, trait_data) if __name__ == "__main__": import os.path assert os.path.isfile('data.csv') main()