zad 1
This commit is contained in:
		
							
								
								
									
										15
									
								
								zad1/README.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								zad1/README.txt
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,15 @@ | ||||
| CHARAKTERYSTYKA GATUNKÓW IRYSÓW | ||||
|  | ||||
| Liczba klas (gatunków): 3 | ||||
|  | ||||
| Liczba obserwacji: 150 (50 dla każdego gatunku) | ||||
|  | ||||
| Atrybuty (kolumny): | ||||
| 1. długość działki kielicha (ang. sepal length) [cm] | ||||
| 2. szerokość działki kielicha (ang. sepal width) [cm] | ||||
| 3. długość płatka (ang. petal length) [cm] | ||||
| 4. szerokość płatka (ang. petal width) [cm] | ||||
| 5. gatunek (ang. species): | ||||
|    0 - setosa | ||||
|    1 - versicolor | ||||
|    2 - virginica | ||||
							
								
								
									
										150
									
								
								zad1/data.csv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										150
									
								
								zad1/data.csv
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,150 @@ | ||||
| 5.1,3.5,1.4,0.2,0 | ||||
| 4.9,3.0,1.4,0.2,0 | ||||
| 4.7,3.2,1.3,0.2,0 | ||||
| 4.6,3.1,1.5,0.2,0 | ||||
| 5.0,3.6,1.4,0.2,0 | ||||
| 5.4,3.9,1.7,0.4,0 | ||||
| 4.6,3.4,1.4,0.3,0 | ||||
| 5.0,3.4,1.5,0.2,0 | ||||
| 4.4,2.9,1.4,0.2,0 | ||||
| 4.9,3.1,1.5,0.1,0 | ||||
| 5.4,3.7,1.5,0.2,0 | ||||
| 4.8,3.4,1.6,0.2,0 | ||||
| 4.8,3.0,1.4,0.1,0 | ||||
| 4.3,3.0,1.1,0.1,0 | ||||
| 5.8,4.0,1.2,0.2,0 | ||||
| 5.7,4.4,1.5,0.4,0 | ||||
| 5.4,3.9,1.3,0.4,0 | ||||
| 5.1,3.5,1.4,0.3,0 | ||||
| 5.7,3.8,1.7,0.3,0 | ||||
| 5.1,3.8,1.5,0.3,0 | ||||
| 5.4,3.4,1.7,0.2,0 | ||||
| 5.1,3.7,1.5,0.4,0 | ||||
| 4.6,3.6,1.0,0.2,0 | ||||
| 5.1,3.3,1.7,0.5,0 | ||||
| 4.8,3.4,1.9,0.2,0 | ||||
| 5.0,3.0,1.6,0.2,0 | ||||
| 5.0,3.4,1.6,0.4,0 | ||||
| 5.2,3.5,1.5,0.2,0 | ||||
| 5.2,3.4,1.4,0.2,0 | ||||
| 4.7,3.2,1.6,0.2,0 | ||||
| 4.8,3.1,1.6,0.2,0 | ||||
| 5.4,3.4,1.5,0.4,0 | ||||
| 5.2,4.1,1.5,0.1,0 | ||||
| 5.5,4.2,1.4,0.2,0 | ||||
| 4.9,3.1,1.5,0.2,0 | ||||
| 5.0,3.2,1.2,0.2,0 | ||||
| 5.5,3.5,1.3,0.2,0 | ||||
| 4.9,3.6,1.4,0.1,0 | ||||
| 4.4,3.0,1.3,0.2,0 | ||||
| 5.1,3.4,1.5,0.2,0 | ||||
| 5.0,3.5,1.3,0.3,0 | ||||
| 4.5,2.3,1.3,0.3,0 | ||||
| 4.4,3.2,1.3,0.2,0 | ||||
| 5.0,3.5,1.6,0.6,0 | ||||
| 5.1,3.8,1.9,0.4,0 | ||||
| 4.8,3.0,1.4,0.3,0 | ||||
| 5.1,3.8,1.6,0.2,0 | ||||
| 4.6,3.2,1.4,0.2,0 | ||||
| 5.3,3.7,1.5,0.2,0 | ||||
| 5.0,3.3,1.4,0.2,0 | ||||
| 7.0,3.2,4.7,1.4,1 | ||||
| 6.4,3.2,4.5,1.5,1 | ||||
| 6.9,3.1,4.9,1.5,1 | ||||
| 5.5,2.3,4.0,1.3,1 | ||||
| 6.5,2.8,4.6,1.5,1 | ||||
| 5.7,2.8,4.5,1.3,1 | ||||
| 6.3,3.3,4.7,1.6,1 | ||||
| 4.9,2.4,3.3,1.0,1 | ||||
| 6.6,2.9,4.6,1.3,1 | ||||
| 5.2,2.7,3.9,1.4,1 | ||||
| 5.0,2.0,3.5,1.0,1 | ||||
| 5.9,3.0,4.2,1.5,1 | ||||
| 6.0,2.2,4.0,1.0,1 | ||||
| 6.1,2.9,4.7,1.4,1 | ||||
| 5.6,2.9,3.6,1.3,1 | ||||
| 6.7,3.1,4.4,1.4,1 | ||||
| 5.6,3.0,4.5,1.5,1 | ||||
| 5.8,2.7,4.1,1.0,1 | ||||
| 6.2,2.2,4.5,1.5,1 | ||||
| 5.6,2.5,3.9,1.1,1 | ||||
| 5.9,3.2,4.8,1.8,1 | ||||
| 6.1,2.8,4.0,1.3,1 | ||||
| 6.3,2.5,4.9,1.5,1 | ||||
| 6.1,2.8,4.7,1.2,1 | ||||
| 6.4,2.9,4.3,1.3,1 | ||||
| 6.6,3.0,4.4,1.4,1 | ||||
| 6.8,2.8,4.8,1.4,1 | ||||
| 6.7,3.0,5.0,1.7,1 | ||||
| 6.0,2.9,4.5,1.5,1 | ||||
| 5.7,2.6,3.5,1.0,1 | ||||
| 5.5,2.4,3.8,1.1,1 | ||||
| 5.5,2.4,3.7,1.0,1 | ||||
| 5.8,2.7,3.9,1.2,1 | ||||
| 6.0,2.7,5.1,1.6,1 | ||||
| 5.4,3.0,4.5,1.5,1 | ||||
| 6.0,3.4,4.5,1.6,1 | ||||
| 6.7,3.1,4.7,1.5,1 | ||||
| 6.3,2.3,4.4,1.3,1 | ||||
| 5.6,3.0,4.1,1.3,1 | ||||
| 5.5,2.5,4.0,1.3,1 | ||||
| 5.5,2.6,4.4,1.2,1 | ||||
| 6.1,3.0,4.6,1.4,1 | ||||
| 5.8,2.6,4.0,1.2,1 | ||||
| 5.0,2.3,3.3,1.0,1 | ||||
| 5.6,2.7,4.2,1.3,1 | ||||
| 5.7,3.0,4.2,1.2,1 | ||||
| 5.7,2.9,4.2,1.3,1 | ||||
| 6.2,2.9,4.3,1.3,1 | ||||
| 5.1,2.5,3.0,1.1,1 | ||||
| 5.7,2.8,4.1,1.3,1 | ||||
| 6.3,3.3,6.0,2.5,2 | ||||
| 5.8,2.7,5.1,1.9,2 | ||||
| 7.1,3.0,5.9,2.1,2 | ||||
| 6.3,2.9,5.6,1.8,2 | ||||
| 6.5,3.0,5.8,2.2,2 | ||||
| 7.6,3.0,6.6,2.1,2 | ||||
| 4.9,2.5,4.5,1.7,2 | ||||
| 7.3,2.9,6.3,1.8,2 | ||||
| 6.7,2.5,5.8,1.8,2 | ||||
| 7.2,3.6,6.1,2.5,2 | ||||
| 6.5,3.2,5.1,2.0,2 | ||||
| 6.4,2.7,5.3,1.9,2 | ||||
| 6.8,3.0,5.5,2.1,2 | ||||
| 5.7,2.5,5.0,2.0,2 | ||||
| 5.8,2.8,5.1,2.4,2 | ||||
| 6.4,3.2,5.3,2.3,2 | ||||
| 6.5,3.0,5.5,1.8,2 | ||||
| 7.7,3.8,6.7,2.2,2 | ||||
| 7.7,2.6,6.9,2.3,2 | ||||
| 6.0,2.2,5.0,1.5,2 | ||||
| 6.9,3.2,5.7,2.3,2 | ||||
| 5.6,2.8,4.9,2.0,2 | ||||
| 7.7,2.8,6.7,2.0,2 | ||||
| 6.3,2.7,4.9,1.8,2 | ||||
| 6.7,3.3,5.7,2.1,2 | ||||
| 7.2,3.2,6.0,1.8,2 | ||||
| 6.2,2.8,4.8,1.8,2 | ||||
| 6.1,3.0,4.9,1.8,2 | ||||
| 6.4,2.8,5.6,2.1,2 | ||||
| 7.2,3.0,5.8,1.6,2 | ||||
| 7.4,2.8,6.1,1.9,2 | ||||
| 7.9,3.8,6.4,2.0,2 | ||||
| 6.4,2.8,5.6,2.2,2 | ||||
| 6.3,2.8,5.1,1.5,2 | ||||
| 6.1,2.6,5.6,1.4,2 | ||||
| 7.7,3.0,6.1,2.3,2 | ||||
| 6.3,3.4,5.6,2.4,2 | ||||
| 6.4,3.1,5.5,1.8,2 | ||||
| 6.0,3.0,4.8,1.8,2 | ||||
| 6.9,3.1,5.4,2.1,2 | ||||
| 6.7,3.1,5.6,2.4,2 | ||||
| 6.9,3.1,5.1,2.3,2 | ||||
| 5.8,2.7,5.1,1.9,2 | ||||
| 6.8,3.2,5.9,2.3,2 | ||||
| 6.7,3.3,5.7,2.5,2 | ||||
| 6.7,3.0,5.2,2.3,2 | ||||
| 6.3,2.5,5.0,1.9,2 | ||||
| 6.5,3.0,5.2,2.0,2 | ||||
| 6.2,3.4,5.4,2.3,2 | ||||
| 5.9,3.0,5.1,1.8,2 | ||||
| 
 | 
							
								
								
									
										207
									
								
								zad1/main.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										207
									
								
								zad1/main.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,207 @@ | ||||
| """ | ||||
| Komputerowa analiza danych | ||||
| Zadanie 1 | ||||
| Michał Leśniak 195642 | ||||
| """ | ||||
| import math | ||||
| import locale | ||||
| locale.setlocale(locale.LC_ALL, '') | ||||
|  | ||||
| SPECIES_NAMES = { | ||||
|     '0': 'setosa', | ||||
|     '1': 'versicolor', | ||||
|     '2': 'virginica' | ||||
| } | ||||
|  | ||||
| ATTRIBUTE_NAMES = ['Długość działki kielicha [cm]', | ||||
|                    'Szerokość działki kielicha [cm]', | ||||
|                    'Długość płatka [cm]', | ||||
|                    'Szerokość płatka [cm]'] | ||||
|  | ||||
|  | ||||
| def mean(lst): | ||||
|     sum = 0 | ||||
|     for i in lst: | ||||
|         sum += i | ||||
|     return sum/len(lst) | ||||
|  | ||||
|  | ||||
| def median(lst, is_even): | ||||
|     return (lst[len(lst)//2-1]+lst[len(lst)//2])/2 if is_even else lst[len(lst)//2] | ||||
|  | ||||
|  | ||||
| def sample_standard_deviation(lst, lst_mean): | ||||
|     sum = 0 | ||||
|     for x in [(y-lst_mean)**2 for y in lst]: | ||||
|         sum += x | ||||
|     return math.sqrt(sum/(len(lst)-1)) | ||||
|  | ||||
|  | ||||
| def q1(lst, is_even, lst_median): | ||||
|     idx = len(lst)//2-1 if is_even else len(lst)//2+1 | ||||
|     sorted_list = lst[:idx] | ||||
|     if is_even: | ||||
|         sorted_list.append(lst_median) | ||||
|  | ||||
|     return median(sorted_list, is_even) | ||||
|  | ||||
|  | ||||
| def q3(lst, is_even, lst_median): | ||||
|     idx = len(lst)//2+1 if is_even else len(lst)//2 | ||||
|     sorted_list = lst[idx:] | ||||
|     if is_even: | ||||
|         sorted_list.insert(0, lst_median) | ||||
|  | ||||
|     return median(sorted_list, is_even) | ||||
|  | ||||
|  | ||||
| def min(lst): | ||||
|     return lst[0] | ||||
|  | ||||
|  | ||||
| def max(lst): | ||||
|     return lst[-1] | ||||
|  | ||||
|  | ||||
| def calc_data(lst): | ||||
|     is_even = len(lst) % 2 == 0 | ||||
|     lst_mean = mean(lst) | ||||
|     lst_median = median(lst, is_even) | ||||
|     return min(lst), lst_mean, sample_standard_deviation(lst, lst_mean), lst_median, q1(lst, is_even, lst_median), q3(lst, is_even, lst_median), max(lst) | ||||
|  | ||||
|  | ||||
| def calc_species_data(species): | ||||
|     total = 0 | ||||
|     for x in species.values(): | ||||
|         total += x | ||||
|  | ||||
|     species_list = [] | ||||
|     for key in species: | ||||
|         species_list.append( | ||||
|             (SPECIES_NAMES[key], species[key], species[key]/total)) | ||||
|  | ||||
|     species_list.append(("Razem", total, 1.0)) | ||||
|  | ||||
|     return species_list | ||||
|  | ||||
|  | ||||
| def percentage_format(x): | ||||
|     return locale.format_string('%.1f%%', x*100) | ||||
|  | ||||
|  | ||||
| def float_format(x): | ||||
|     return locale.format_string('%.2f', x) | ||||
|  | ||||
|  | ||||
| def print_table(table): | ||||
|     if not table: | ||||
|         return | ||||
|     cell_sizes = [0] * len(table[0]) | ||||
|     for i in range(len(table[0])): | ||||
|         for x in table: | ||||
|             if not x: | ||||
|                 continue | ||||
|             if len(x[i]) > cell_sizes[i]: | ||||
|                 cell_sizes[i] = len(x[i]) | ||||
|  | ||||
|     header_row_format = ' | '.join((f'{{:^{x}}}' for x in cell_sizes)) | ||||
|     row_format = f'{{:<{cell_sizes[0]}}} | ' + \ | ||||
|         ' | '.join((f'{{:^{x}}}' for x in cell_sizes[1:])) | ||||
|  | ||||
|     lines = [] | ||||
|     for x in table: | ||||
|         if x == table[0]: | ||||
|             lines.append(header_row_format.format(*x)) | ||||
|             continue | ||||
|         if x is None: | ||||
|             lines.append(None) | ||||
|             continue | ||||
|         lines.append(row_format.format(*x)) | ||||
|     row_size = 0 | ||||
|     for x in lines: | ||||
|         if x and len(x) > row_size: | ||||
|             row_size = len(x) | ||||
|     row_size += 1 | ||||
|     lines.insert(1, '='*row_size) | ||||
|     lines.insert(0, '='*row_size) | ||||
|     lines.insert(len(lines), '='*row_size) | ||||
|     for i in range(len(lines)): | ||||
|         if lines[i] is None: | ||||
|             lines[i] = '-'*row_size | ||||
|     for x in lines: | ||||
|         print(x) | ||||
|  | ||||
|  | ||||
| def print_data(species_data, trait_data): | ||||
|     species_printable_table = [] | ||||
|     species_printable_table.append(("Gatunek", "Liczebność (%)")) | ||||
|     for x in species_data: | ||||
|         if x == species_data[-1]: | ||||
|             species_printable_table.append(None) | ||||
|         species_printable_table.append( | ||||
|             (x[0], "{} ({})".format(x[1], percentage_format(x[2])))) | ||||
|  | ||||
|     trait_printable_table = [] | ||||
|     trait_printable_table.append( | ||||
|         ("Cecha", "Minimum", "Śr. arytm. (± odch. stand.)", "Mediana (Q1 - Q3)", "Maksimum")) | ||||
|     for i in range(len(trait_data)): | ||||
|         trait_printable_table.append((ATTRIBUTE_NAMES[i], | ||||
|                                       float_format(trait_data[i][0]), | ||||
|                                       "{} (±{})".format(float_format( | ||||
|                                           trait_data[i][1]), float_format(trait_data[i][2])), | ||||
|                                       "{} ({} - {})".format( | ||||
|                                           float_format(trait_data[i][3]), | ||||
|                                           float_format(trait_data[i][4]), | ||||
|                                           float_format(trait_data[i][5])), | ||||
|                                       float_format(trait_data[i][6]))) | ||||
|  | ||||
|     print_table(species_printable_table) | ||||
|     print() | ||||
|     print_table(trait_printable_table) | ||||
|  | ||||
|  | ||||
| def main(): | ||||
|     # read data | ||||
|     with open("data.csv", "r") as f: | ||||
|         data = f.read().splitlines() | ||||
|  | ||||
|     # parse data | ||||
|     sepal_length_list = [] | ||||
|     sepal_width_list = [] | ||||
|     petal_length_list = [] | ||||
|     petal_width_list = [] | ||||
|     species_dict = { | ||||
|         '0': 0, | ||||
|         '1': 0, | ||||
|         '2': 0, | ||||
|     } | ||||
|     for line in data: | ||||
|         sepal_length, sepal_width, petal_length, petal_width, species = line.split( | ||||
|             ',') | ||||
|         sepal_length_list.append(float(sepal_length)) | ||||
|         sepal_width_list.append(float(sepal_width)) | ||||
|         petal_length_list.append(float(petal_length)) | ||||
|         petal_width_list.append(float(petal_width)) | ||||
|         species_dict[species] += 1 | ||||
|  | ||||
|     sepal_length_list.sort() | ||||
|     sepal_width_list.sort() | ||||
|     petal_length_list.sort() | ||||
|     petal_width_list.sort() | ||||
|  | ||||
|     # calculate results | ||||
|     species_data = calc_species_data(species_dict) | ||||
|  | ||||
|     trait_data = [] | ||||
|  | ||||
|     for lst in [sepal_length_list, sepal_width_list, petal_length_list, petal_width_list]: | ||||
|         trait_data.append(calc_data(lst)) | ||||
|  | ||||
|     # print results | ||||
|     print_data(species_data, trait_data) | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     import os.path | ||||
|     assert os.path.isfile('data.csv') | ||||
|     main() | ||||
		Reference in New Issue
	
	Block a user