commit 40f642966c0f4bbcecb5b864da7767b9495ea70b Author: Kapitan Date: Sun Oct 17 21:28:23 2021 +0200 zad 1 diff --git a/zad1/README.txt b/zad1/README.txt new file mode 100644 index 0000000..31f465f --- /dev/null +++ b/zad1/README.txt @@ -0,0 +1,15 @@ +CHARAKTERYSTYKA GATUNKÓW IRYSÓW + +Liczba klas (gatunków): 3 + +Liczba obserwacji: 150 (50 dla każdego gatunku) + +Atrybuty (kolumny): +1. długość działki kielicha (ang. sepal length) [cm] +2. szerokość działki kielicha (ang. sepal width) [cm] +3. długość płatka (ang. petal length) [cm] +4. szerokość płatka (ang. petal width) [cm] +5. gatunek (ang. species): + 0 - setosa + 1 - versicolor + 2 - virginica diff --git a/zad1/data.csv b/zad1/data.csv new file mode 100644 index 0000000..07efc79 --- /dev/null +++ b/zad1/data.csv @@ -0,0 +1,150 @@ +5.1,3.5,1.4,0.2,0 +4.9,3.0,1.4,0.2,0 +4.7,3.2,1.3,0.2,0 +4.6,3.1,1.5,0.2,0 +5.0,3.6,1.4,0.2,0 +5.4,3.9,1.7,0.4,0 +4.6,3.4,1.4,0.3,0 +5.0,3.4,1.5,0.2,0 +4.4,2.9,1.4,0.2,0 +4.9,3.1,1.5,0.1,0 +5.4,3.7,1.5,0.2,0 +4.8,3.4,1.6,0.2,0 +4.8,3.0,1.4,0.1,0 +4.3,3.0,1.1,0.1,0 +5.8,4.0,1.2,0.2,0 +5.7,4.4,1.5,0.4,0 +5.4,3.9,1.3,0.4,0 +5.1,3.5,1.4,0.3,0 +5.7,3.8,1.7,0.3,0 +5.1,3.8,1.5,0.3,0 +5.4,3.4,1.7,0.2,0 +5.1,3.7,1.5,0.4,0 +4.6,3.6,1.0,0.2,0 +5.1,3.3,1.7,0.5,0 +4.8,3.4,1.9,0.2,0 +5.0,3.0,1.6,0.2,0 +5.0,3.4,1.6,0.4,0 +5.2,3.5,1.5,0.2,0 +5.2,3.4,1.4,0.2,0 +4.7,3.2,1.6,0.2,0 +4.8,3.1,1.6,0.2,0 +5.4,3.4,1.5,0.4,0 +5.2,4.1,1.5,0.1,0 +5.5,4.2,1.4,0.2,0 +4.9,3.1,1.5,0.2,0 +5.0,3.2,1.2,0.2,0 +5.5,3.5,1.3,0.2,0 +4.9,3.6,1.4,0.1,0 +4.4,3.0,1.3,0.2,0 +5.1,3.4,1.5,0.2,0 +5.0,3.5,1.3,0.3,0 +4.5,2.3,1.3,0.3,0 +4.4,3.2,1.3,0.2,0 +5.0,3.5,1.6,0.6,0 +5.1,3.8,1.9,0.4,0 +4.8,3.0,1.4,0.3,0 +5.1,3.8,1.6,0.2,0 +4.6,3.2,1.4,0.2,0 +5.3,3.7,1.5,0.2,0 +5.0,3.3,1.4,0.2,0 +7.0,3.2,4.7,1.4,1 +6.4,3.2,4.5,1.5,1 +6.9,3.1,4.9,1.5,1 +5.5,2.3,4.0,1.3,1 +6.5,2.8,4.6,1.5,1 +5.7,2.8,4.5,1.3,1 +6.3,3.3,4.7,1.6,1 +4.9,2.4,3.3,1.0,1 +6.6,2.9,4.6,1.3,1 +5.2,2.7,3.9,1.4,1 +5.0,2.0,3.5,1.0,1 +5.9,3.0,4.2,1.5,1 +6.0,2.2,4.0,1.0,1 +6.1,2.9,4.7,1.4,1 +5.6,2.9,3.6,1.3,1 +6.7,3.1,4.4,1.4,1 +5.6,3.0,4.5,1.5,1 +5.8,2.7,4.1,1.0,1 +6.2,2.2,4.5,1.5,1 +5.6,2.5,3.9,1.1,1 +5.9,3.2,4.8,1.8,1 +6.1,2.8,4.0,1.3,1 +6.3,2.5,4.9,1.5,1 +6.1,2.8,4.7,1.2,1 +6.4,2.9,4.3,1.3,1 +6.6,3.0,4.4,1.4,1 +6.8,2.8,4.8,1.4,1 +6.7,3.0,5.0,1.7,1 +6.0,2.9,4.5,1.5,1 +5.7,2.6,3.5,1.0,1 +5.5,2.4,3.8,1.1,1 +5.5,2.4,3.7,1.0,1 +5.8,2.7,3.9,1.2,1 +6.0,2.7,5.1,1.6,1 +5.4,3.0,4.5,1.5,1 +6.0,3.4,4.5,1.6,1 +6.7,3.1,4.7,1.5,1 +6.3,2.3,4.4,1.3,1 +5.6,3.0,4.1,1.3,1 +5.5,2.5,4.0,1.3,1 +5.5,2.6,4.4,1.2,1 +6.1,3.0,4.6,1.4,1 +5.8,2.6,4.0,1.2,1 +5.0,2.3,3.3,1.0,1 +5.6,2.7,4.2,1.3,1 +5.7,3.0,4.2,1.2,1 +5.7,2.9,4.2,1.3,1 +6.2,2.9,4.3,1.3,1 +5.1,2.5,3.0,1.1,1 +5.7,2.8,4.1,1.3,1 +6.3,3.3,6.0,2.5,2 +5.8,2.7,5.1,1.9,2 +7.1,3.0,5.9,2.1,2 +6.3,2.9,5.6,1.8,2 +6.5,3.0,5.8,2.2,2 +7.6,3.0,6.6,2.1,2 +4.9,2.5,4.5,1.7,2 +7.3,2.9,6.3,1.8,2 +6.7,2.5,5.8,1.8,2 +7.2,3.6,6.1,2.5,2 +6.5,3.2,5.1,2.0,2 +6.4,2.7,5.3,1.9,2 +6.8,3.0,5.5,2.1,2 +5.7,2.5,5.0,2.0,2 +5.8,2.8,5.1,2.4,2 +6.4,3.2,5.3,2.3,2 +6.5,3.0,5.5,1.8,2 +7.7,3.8,6.7,2.2,2 +7.7,2.6,6.9,2.3,2 +6.0,2.2,5.0,1.5,2 +6.9,3.2,5.7,2.3,2 +5.6,2.8,4.9,2.0,2 +7.7,2.8,6.7,2.0,2 +6.3,2.7,4.9,1.8,2 +6.7,3.3,5.7,2.1,2 +7.2,3.2,6.0,1.8,2 +6.2,2.8,4.8,1.8,2 +6.1,3.0,4.9,1.8,2 +6.4,2.8,5.6,2.1,2 +7.2,3.0,5.8,1.6,2 +7.4,2.8,6.1,1.9,2 +7.9,3.8,6.4,2.0,2 +6.4,2.8,5.6,2.2,2 +6.3,2.8,5.1,1.5,2 +6.1,2.6,5.6,1.4,2 +7.7,3.0,6.1,2.3,2 +6.3,3.4,5.6,2.4,2 +6.4,3.1,5.5,1.8,2 +6.0,3.0,4.8,1.8,2 +6.9,3.1,5.4,2.1,2 +6.7,3.1,5.6,2.4,2 +6.9,3.1,5.1,2.3,2 +5.8,2.7,5.1,1.9,2 +6.8,3.2,5.9,2.3,2 +6.7,3.3,5.7,2.5,2 +6.7,3.0,5.2,2.3,2 +6.3,2.5,5.0,1.9,2 +6.5,3.0,5.2,2.0,2 +6.2,3.4,5.4,2.3,2 +5.9,3.0,5.1,1.8,2 diff --git a/zad1/main.py b/zad1/main.py new file mode 100644 index 0000000..520f3d9 --- /dev/null +++ b/zad1/main.py @@ -0,0 +1,207 @@ +""" +Komputerowa analiza danych +Zadanie 1 +Michał Leśniak 195642 +""" +import math +import locale +locale.setlocale(locale.LC_ALL, '') + +SPECIES_NAMES = { + '0': 'setosa', + '1': 'versicolor', + '2': 'virginica' +} + +ATTRIBUTE_NAMES = ['Długość działki kielicha [cm]', + 'Szerokość działki kielicha [cm]', + 'Długość płatka [cm]', + 'Szerokość płatka [cm]'] + + +def mean(lst): + sum = 0 + for i in lst: + sum += i + return sum/len(lst) + + +def median(lst, is_even): + return (lst[len(lst)//2-1]+lst[len(lst)//2])/2 if is_even else lst[len(lst)//2] + + +def sample_standard_deviation(lst, lst_mean): + sum = 0 + for x in [(y-lst_mean)**2 for y in lst]: + sum += x + return math.sqrt(sum/(len(lst)-1)) + + +def q1(lst, is_even, lst_median): + idx = len(lst)//2-1 if is_even else len(lst)//2+1 + sorted_list = lst[:idx] + if is_even: + sorted_list.append(lst_median) + + return median(sorted_list, is_even) + + +def q3(lst, is_even, lst_median): + idx = len(lst)//2+1 if is_even else len(lst)//2 + sorted_list = lst[idx:] + if is_even: + sorted_list.insert(0, lst_median) + + return median(sorted_list, is_even) + + +def min(lst): + return lst[0] + + +def max(lst): + return lst[-1] + + +def calc_data(lst): + is_even = len(lst) % 2 == 0 + lst_mean = mean(lst) + lst_median = median(lst, is_even) + return min(lst), lst_mean, sample_standard_deviation(lst, lst_mean), lst_median, q1(lst, is_even, lst_median), q3(lst, is_even, lst_median), max(lst) + + +def calc_species_data(species): + total = 0 + for x in species.values(): + total += x + + species_list = [] + for key in species: + species_list.append( + (SPECIES_NAMES[key], species[key], species[key]/total)) + + species_list.append(("Razem", total, 1.0)) + + return species_list + + +def percentage_format(x): + return locale.format_string('%.1f%%', x*100) + + +def float_format(x): + return locale.format_string('%.2f', x) + + +def print_table(table): + if not table: + return + cell_sizes = [0] * len(table[0]) + for i in range(len(table[0])): + for x in table: + if not x: + continue + if len(x[i]) > cell_sizes[i]: + cell_sizes[i] = len(x[i]) + + header_row_format = ' | '.join((f'{{:^{x}}}' for x in cell_sizes)) + row_format = f'{{:<{cell_sizes[0]}}} | ' + \ + ' | '.join((f'{{:^{x}}}' for x in cell_sizes[1:])) + + lines = [] + for x in table: + if x == table[0]: + lines.append(header_row_format.format(*x)) + continue + if x is None: + lines.append(None) + continue + lines.append(row_format.format(*x)) + row_size = 0 + for x in lines: + if x and len(x) > row_size: + row_size = len(x) + row_size += 1 + lines.insert(1, '='*row_size) + lines.insert(0, '='*row_size) + lines.insert(len(lines), '='*row_size) + for i in range(len(lines)): + if lines[i] is None: + lines[i] = '-'*row_size + for x in lines: + print(x) + + +def print_data(species_data, trait_data): + species_printable_table = [] + species_printable_table.append(("Gatunek", "Liczebność (%)")) + for x in species_data: + if x == species_data[-1]: + species_printable_table.append(None) + species_printable_table.append( + (x[0], "{} ({})".format(x[1], percentage_format(x[2])))) + + trait_printable_table = [] + trait_printable_table.append( + ("Cecha", "Minimum", "Śr. arytm. (± odch. stand.)", "Mediana (Q1 - Q3)", "Maksimum")) + for i in range(len(trait_data)): + trait_printable_table.append((ATTRIBUTE_NAMES[i], + float_format(trait_data[i][0]), + "{} (±{})".format(float_format( + trait_data[i][1]), float_format(trait_data[i][2])), + "{} ({} - {})".format( + float_format(trait_data[i][3]), + float_format(trait_data[i][4]), + float_format(trait_data[i][5])), + float_format(trait_data[i][6]))) + + print_table(species_printable_table) + print() + print_table(trait_printable_table) + + +def main(): + # read data + with open("data.csv", "r") as f: + data = f.read().splitlines() + + # parse data + sepal_length_list = [] + sepal_width_list = [] + petal_length_list = [] + petal_width_list = [] + species_dict = { + '0': 0, + '1': 0, + '2': 0, + } + for line in data: + sepal_length, sepal_width, petal_length, petal_width, species = line.split( + ',') + sepal_length_list.append(float(sepal_length)) + sepal_width_list.append(float(sepal_width)) + petal_length_list.append(float(petal_length)) + petal_width_list.append(float(petal_width)) + species_dict[species] += 1 + + sepal_length_list.sort() + sepal_width_list.sort() + petal_length_list.sort() + petal_width_list.sort() + + # calculate results + species_data = calc_species_data(species_dict) + + trait_data = [] + + for lst in [sepal_length_list, sepal_width_list, petal_length_list, petal_width_list]: + trait_data.append(calc_data(lst)) + + # print results + print_data(species_data, trait_data) + + +if __name__ == "__main__": + import os.path + assert os.path.isfile('data.csv') + main()