zad 1
This commit is contained in:
		
							
								
								
									
										15
									
								
								zad1/README.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								zad1/README.txt
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,15 @@ | |||||||
|  | CHARAKTERYSTYKA GATUNKÓW IRYSÓW | ||||||
|  |  | ||||||
|  | Liczba klas (gatunków): 3 | ||||||
|  |  | ||||||
|  | Liczba obserwacji: 150 (50 dla każdego gatunku) | ||||||
|  |  | ||||||
|  | Atrybuty (kolumny): | ||||||
|  | 1. długość działki kielicha (ang. sepal length) [cm] | ||||||
|  | 2. szerokość działki kielicha (ang. sepal width) [cm] | ||||||
|  | 3. długość płatka (ang. petal length) [cm] | ||||||
|  | 4. szerokość płatka (ang. petal width) [cm] | ||||||
|  | 5. gatunek (ang. species): | ||||||
|  |    0 - setosa | ||||||
|  |    1 - versicolor | ||||||
|  |    2 - virginica | ||||||
							
								
								
									
										150
									
								
								zad1/data.csv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										150
									
								
								zad1/data.csv
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,150 @@ | |||||||
|  | 5.1,3.5,1.4,0.2,0 | ||||||
|  | 4.9,3.0,1.4,0.2,0 | ||||||
|  | 4.7,3.2,1.3,0.2,0 | ||||||
|  | 4.6,3.1,1.5,0.2,0 | ||||||
|  | 5.0,3.6,1.4,0.2,0 | ||||||
|  | 5.4,3.9,1.7,0.4,0 | ||||||
|  | 4.6,3.4,1.4,0.3,0 | ||||||
|  | 5.0,3.4,1.5,0.2,0 | ||||||
|  | 4.4,2.9,1.4,0.2,0 | ||||||
|  | 4.9,3.1,1.5,0.1,0 | ||||||
|  | 5.4,3.7,1.5,0.2,0 | ||||||
|  | 4.8,3.4,1.6,0.2,0 | ||||||
|  | 4.8,3.0,1.4,0.1,0 | ||||||
|  | 4.3,3.0,1.1,0.1,0 | ||||||
|  | 5.8,4.0,1.2,0.2,0 | ||||||
|  | 5.7,4.4,1.5,0.4,0 | ||||||
|  | 5.4,3.9,1.3,0.4,0 | ||||||
|  | 5.1,3.5,1.4,0.3,0 | ||||||
|  | 5.7,3.8,1.7,0.3,0 | ||||||
|  | 5.1,3.8,1.5,0.3,0 | ||||||
|  | 5.4,3.4,1.7,0.2,0 | ||||||
|  | 5.1,3.7,1.5,0.4,0 | ||||||
|  | 4.6,3.6,1.0,0.2,0 | ||||||
|  | 5.1,3.3,1.7,0.5,0 | ||||||
|  | 4.8,3.4,1.9,0.2,0 | ||||||
|  | 5.0,3.0,1.6,0.2,0 | ||||||
|  | 5.0,3.4,1.6,0.4,0 | ||||||
|  | 5.2,3.5,1.5,0.2,0 | ||||||
|  | 5.2,3.4,1.4,0.2,0 | ||||||
|  | 4.7,3.2,1.6,0.2,0 | ||||||
|  | 4.8,3.1,1.6,0.2,0 | ||||||
|  | 5.4,3.4,1.5,0.4,0 | ||||||
|  | 5.2,4.1,1.5,0.1,0 | ||||||
|  | 5.5,4.2,1.4,0.2,0 | ||||||
|  | 4.9,3.1,1.5,0.2,0 | ||||||
|  | 5.0,3.2,1.2,0.2,0 | ||||||
|  | 5.5,3.5,1.3,0.2,0 | ||||||
|  | 4.9,3.6,1.4,0.1,0 | ||||||
|  | 4.4,3.0,1.3,0.2,0 | ||||||
|  | 5.1,3.4,1.5,0.2,0 | ||||||
|  | 5.0,3.5,1.3,0.3,0 | ||||||
|  | 4.5,2.3,1.3,0.3,0 | ||||||
|  | 4.4,3.2,1.3,0.2,0 | ||||||
|  | 5.0,3.5,1.6,0.6,0 | ||||||
|  | 5.1,3.8,1.9,0.4,0 | ||||||
|  | 4.8,3.0,1.4,0.3,0 | ||||||
|  | 5.1,3.8,1.6,0.2,0 | ||||||
|  | 4.6,3.2,1.4,0.2,0 | ||||||
|  | 5.3,3.7,1.5,0.2,0 | ||||||
|  | 5.0,3.3,1.4,0.2,0 | ||||||
|  | 7.0,3.2,4.7,1.4,1 | ||||||
|  | 6.4,3.2,4.5,1.5,1 | ||||||
|  | 6.9,3.1,4.9,1.5,1 | ||||||
|  | 5.5,2.3,4.0,1.3,1 | ||||||
|  | 6.5,2.8,4.6,1.5,1 | ||||||
|  | 5.7,2.8,4.5,1.3,1 | ||||||
|  | 6.3,3.3,4.7,1.6,1 | ||||||
|  | 4.9,2.4,3.3,1.0,1 | ||||||
|  | 6.6,2.9,4.6,1.3,1 | ||||||
|  | 5.2,2.7,3.9,1.4,1 | ||||||
|  | 5.0,2.0,3.5,1.0,1 | ||||||
|  | 5.9,3.0,4.2,1.5,1 | ||||||
|  | 6.0,2.2,4.0,1.0,1 | ||||||
|  | 6.1,2.9,4.7,1.4,1 | ||||||
|  | 5.6,2.9,3.6,1.3,1 | ||||||
|  | 6.7,3.1,4.4,1.4,1 | ||||||
|  | 5.6,3.0,4.5,1.5,1 | ||||||
|  | 5.8,2.7,4.1,1.0,1 | ||||||
|  | 6.2,2.2,4.5,1.5,1 | ||||||
|  | 5.6,2.5,3.9,1.1,1 | ||||||
|  | 5.9,3.2,4.8,1.8,1 | ||||||
|  | 6.1,2.8,4.0,1.3,1 | ||||||
|  | 6.3,2.5,4.9,1.5,1 | ||||||
|  | 6.1,2.8,4.7,1.2,1 | ||||||
|  | 6.4,2.9,4.3,1.3,1 | ||||||
|  | 6.6,3.0,4.4,1.4,1 | ||||||
|  | 6.8,2.8,4.8,1.4,1 | ||||||
|  | 6.7,3.0,5.0,1.7,1 | ||||||
|  | 6.0,2.9,4.5,1.5,1 | ||||||
|  | 5.7,2.6,3.5,1.0,1 | ||||||
|  | 5.5,2.4,3.8,1.1,1 | ||||||
|  | 5.5,2.4,3.7,1.0,1 | ||||||
|  | 5.8,2.7,3.9,1.2,1 | ||||||
|  | 6.0,2.7,5.1,1.6,1 | ||||||
|  | 5.4,3.0,4.5,1.5,1 | ||||||
|  | 6.0,3.4,4.5,1.6,1 | ||||||
|  | 6.7,3.1,4.7,1.5,1 | ||||||
|  | 6.3,2.3,4.4,1.3,1 | ||||||
|  | 5.6,3.0,4.1,1.3,1 | ||||||
|  | 5.5,2.5,4.0,1.3,1 | ||||||
|  | 5.5,2.6,4.4,1.2,1 | ||||||
|  | 6.1,3.0,4.6,1.4,1 | ||||||
|  | 5.8,2.6,4.0,1.2,1 | ||||||
|  | 5.0,2.3,3.3,1.0,1 | ||||||
|  | 5.6,2.7,4.2,1.3,1 | ||||||
|  | 5.7,3.0,4.2,1.2,1 | ||||||
|  | 5.7,2.9,4.2,1.3,1 | ||||||
|  | 6.2,2.9,4.3,1.3,1 | ||||||
|  | 5.1,2.5,3.0,1.1,1 | ||||||
|  | 5.7,2.8,4.1,1.3,1 | ||||||
|  | 6.3,3.3,6.0,2.5,2 | ||||||
|  | 5.8,2.7,5.1,1.9,2 | ||||||
|  | 7.1,3.0,5.9,2.1,2 | ||||||
|  | 6.3,2.9,5.6,1.8,2 | ||||||
|  | 6.5,3.0,5.8,2.2,2 | ||||||
|  | 7.6,3.0,6.6,2.1,2 | ||||||
|  | 4.9,2.5,4.5,1.7,2 | ||||||
|  | 7.3,2.9,6.3,1.8,2 | ||||||
|  | 6.7,2.5,5.8,1.8,2 | ||||||
|  | 7.2,3.6,6.1,2.5,2 | ||||||
|  | 6.5,3.2,5.1,2.0,2 | ||||||
|  | 6.4,2.7,5.3,1.9,2 | ||||||
|  | 6.8,3.0,5.5,2.1,2 | ||||||
|  | 5.7,2.5,5.0,2.0,2 | ||||||
|  | 5.8,2.8,5.1,2.4,2 | ||||||
|  | 6.4,3.2,5.3,2.3,2 | ||||||
|  | 6.5,3.0,5.5,1.8,2 | ||||||
|  | 7.7,3.8,6.7,2.2,2 | ||||||
|  | 7.7,2.6,6.9,2.3,2 | ||||||
|  | 6.0,2.2,5.0,1.5,2 | ||||||
|  | 6.9,3.2,5.7,2.3,2 | ||||||
|  | 5.6,2.8,4.9,2.0,2 | ||||||
|  | 7.7,2.8,6.7,2.0,2 | ||||||
|  | 6.3,2.7,4.9,1.8,2 | ||||||
|  | 6.7,3.3,5.7,2.1,2 | ||||||
|  | 7.2,3.2,6.0,1.8,2 | ||||||
|  | 6.2,2.8,4.8,1.8,2 | ||||||
|  | 6.1,3.0,4.9,1.8,2 | ||||||
|  | 6.4,2.8,5.6,2.1,2 | ||||||
|  | 7.2,3.0,5.8,1.6,2 | ||||||
|  | 7.4,2.8,6.1,1.9,2 | ||||||
|  | 7.9,3.8,6.4,2.0,2 | ||||||
|  | 6.4,2.8,5.6,2.2,2 | ||||||
|  | 6.3,2.8,5.1,1.5,2 | ||||||
|  | 6.1,2.6,5.6,1.4,2 | ||||||
|  | 7.7,3.0,6.1,2.3,2 | ||||||
|  | 6.3,3.4,5.6,2.4,2 | ||||||
|  | 6.4,3.1,5.5,1.8,2 | ||||||
|  | 6.0,3.0,4.8,1.8,2 | ||||||
|  | 6.9,3.1,5.4,2.1,2 | ||||||
|  | 6.7,3.1,5.6,2.4,2 | ||||||
|  | 6.9,3.1,5.1,2.3,2 | ||||||
|  | 5.8,2.7,5.1,1.9,2 | ||||||
|  | 6.8,3.2,5.9,2.3,2 | ||||||
|  | 6.7,3.3,5.7,2.5,2 | ||||||
|  | 6.7,3.0,5.2,2.3,2 | ||||||
|  | 6.3,2.5,5.0,1.9,2 | ||||||
|  | 6.5,3.0,5.2,2.0,2 | ||||||
|  | 6.2,3.4,5.4,2.3,2 | ||||||
|  | 5.9,3.0,5.1,1.8,2 | ||||||
| 
 | 
							
								
								
									
										207
									
								
								zad1/main.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										207
									
								
								zad1/main.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,207 @@ | |||||||
|  | """ | ||||||
|  | Komputerowa analiza danych | ||||||
|  | Zadanie 1 | ||||||
|  | Michał Leśniak 195642 | ||||||
|  | """ | ||||||
|  | import math | ||||||
|  | import locale | ||||||
|  | locale.setlocale(locale.LC_ALL, '') | ||||||
|  |  | ||||||
|  | SPECIES_NAMES = { | ||||||
|  |     '0': 'setosa', | ||||||
|  |     '1': 'versicolor', | ||||||
|  |     '2': 'virginica' | ||||||
|  | } | ||||||
|  |  | ||||||
|  | ATTRIBUTE_NAMES = ['Długość działki kielicha [cm]', | ||||||
|  |                    'Szerokość działki kielicha [cm]', | ||||||
|  |                    'Długość płatka [cm]', | ||||||
|  |                    'Szerokość płatka [cm]'] | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def mean(lst): | ||||||
|  |     sum = 0 | ||||||
|  |     for i in lst: | ||||||
|  |         sum += i | ||||||
|  |     return sum/len(lst) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def median(lst, is_even): | ||||||
|  |     return (lst[len(lst)//2-1]+lst[len(lst)//2])/2 if is_even else lst[len(lst)//2] | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def sample_standard_deviation(lst, lst_mean): | ||||||
|  |     sum = 0 | ||||||
|  |     for x in [(y-lst_mean)**2 for y in lst]: | ||||||
|  |         sum += x | ||||||
|  |     return math.sqrt(sum/(len(lst)-1)) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def q1(lst, is_even, lst_median): | ||||||
|  |     idx = len(lst)//2-1 if is_even else len(lst)//2+1 | ||||||
|  |     sorted_list = lst[:idx] | ||||||
|  |     if is_even: | ||||||
|  |         sorted_list.append(lst_median) | ||||||
|  |  | ||||||
|  |     return median(sorted_list, is_even) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def q3(lst, is_even, lst_median): | ||||||
|  |     idx = len(lst)//2+1 if is_even else len(lst)//2 | ||||||
|  |     sorted_list = lst[idx:] | ||||||
|  |     if is_even: | ||||||
|  |         sorted_list.insert(0, lst_median) | ||||||
|  |  | ||||||
|  |     return median(sorted_list, is_even) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def min(lst): | ||||||
|  |     return lst[0] | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def max(lst): | ||||||
|  |     return lst[-1] | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def calc_data(lst): | ||||||
|  |     is_even = len(lst) % 2 == 0 | ||||||
|  |     lst_mean = mean(lst) | ||||||
|  |     lst_median = median(lst, is_even) | ||||||
|  |     return min(lst), lst_mean, sample_standard_deviation(lst, lst_mean), lst_median, q1(lst, is_even, lst_median), q3(lst, is_even, lst_median), max(lst) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def calc_species_data(species): | ||||||
|  |     total = 0 | ||||||
|  |     for x in species.values(): | ||||||
|  |         total += x | ||||||
|  |  | ||||||
|  |     species_list = [] | ||||||
|  |     for key in species: | ||||||
|  |         species_list.append( | ||||||
|  |             (SPECIES_NAMES[key], species[key], species[key]/total)) | ||||||
|  |  | ||||||
|  |     species_list.append(("Razem", total, 1.0)) | ||||||
|  |  | ||||||
|  |     return species_list | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def percentage_format(x): | ||||||
|  |     return locale.format_string('%.1f%%', x*100) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def float_format(x): | ||||||
|  |     return locale.format_string('%.2f', x) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def print_table(table): | ||||||
|  |     if not table: | ||||||
|  |         return | ||||||
|  |     cell_sizes = [0] * len(table[0]) | ||||||
|  |     for i in range(len(table[0])): | ||||||
|  |         for x in table: | ||||||
|  |             if not x: | ||||||
|  |                 continue | ||||||
|  |             if len(x[i]) > cell_sizes[i]: | ||||||
|  |                 cell_sizes[i] = len(x[i]) | ||||||
|  |  | ||||||
|  |     header_row_format = ' | '.join((f'{{:^{x}}}' for x in cell_sizes)) | ||||||
|  |     row_format = f'{{:<{cell_sizes[0]}}} | ' + \ | ||||||
|  |         ' | '.join((f'{{:^{x}}}' for x in cell_sizes[1:])) | ||||||
|  |  | ||||||
|  |     lines = [] | ||||||
|  |     for x in table: | ||||||
|  |         if x == table[0]: | ||||||
|  |             lines.append(header_row_format.format(*x)) | ||||||
|  |             continue | ||||||
|  |         if x is None: | ||||||
|  |             lines.append(None) | ||||||
|  |             continue | ||||||
|  |         lines.append(row_format.format(*x)) | ||||||
|  |     row_size = 0 | ||||||
|  |     for x in lines: | ||||||
|  |         if x and len(x) > row_size: | ||||||
|  |             row_size = len(x) | ||||||
|  |     row_size += 1 | ||||||
|  |     lines.insert(1, '='*row_size) | ||||||
|  |     lines.insert(0, '='*row_size) | ||||||
|  |     lines.insert(len(lines), '='*row_size) | ||||||
|  |     for i in range(len(lines)): | ||||||
|  |         if lines[i] is None: | ||||||
|  |             lines[i] = '-'*row_size | ||||||
|  |     for x in lines: | ||||||
|  |         print(x) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def print_data(species_data, trait_data): | ||||||
|  |     species_printable_table = [] | ||||||
|  |     species_printable_table.append(("Gatunek", "Liczebność (%)")) | ||||||
|  |     for x in species_data: | ||||||
|  |         if x == species_data[-1]: | ||||||
|  |             species_printable_table.append(None) | ||||||
|  |         species_printable_table.append( | ||||||
|  |             (x[0], "{} ({})".format(x[1], percentage_format(x[2])))) | ||||||
|  |  | ||||||
|  |     trait_printable_table = [] | ||||||
|  |     trait_printable_table.append( | ||||||
|  |         ("Cecha", "Minimum", "Śr. arytm. (± odch. stand.)", "Mediana (Q1 - Q3)", "Maksimum")) | ||||||
|  |     for i in range(len(trait_data)): | ||||||
|  |         trait_printable_table.append((ATTRIBUTE_NAMES[i], | ||||||
|  |                                       float_format(trait_data[i][0]), | ||||||
|  |                                       "{} (±{})".format(float_format( | ||||||
|  |                                           trait_data[i][1]), float_format(trait_data[i][2])), | ||||||
|  |                                       "{} ({} - {})".format( | ||||||
|  |                                           float_format(trait_data[i][3]), | ||||||
|  |                                           float_format(trait_data[i][4]), | ||||||
|  |                                           float_format(trait_data[i][5])), | ||||||
|  |                                       float_format(trait_data[i][6]))) | ||||||
|  |  | ||||||
|  |     print_table(species_printable_table) | ||||||
|  |     print() | ||||||
|  |     print_table(trait_printable_table) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def main(): | ||||||
|  |     # read data | ||||||
|  |     with open("data.csv", "r") as f: | ||||||
|  |         data = f.read().splitlines() | ||||||
|  |  | ||||||
|  |     # parse data | ||||||
|  |     sepal_length_list = [] | ||||||
|  |     sepal_width_list = [] | ||||||
|  |     petal_length_list = [] | ||||||
|  |     petal_width_list = [] | ||||||
|  |     species_dict = { | ||||||
|  |         '0': 0, | ||||||
|  |         '1': 0, | ||||||
|  |         '2': 0, | ||||||
|  |     } | ||||||
|  |     for line in data: | ||||||
|  |         sepal_length, sepal_width, petal_length, petal_width, species = line.split( | ||||||
|  |             ',') | ||||||
|  |         sepal_length_list.append(float(sepal_length)) | ||||||
|  |         sepal_width_list.append(float(sepal_width)) | ||||||
|  |         petal_length_list.append(float(petal_length)) | ||||||
|  |         petal_width_list.append(float(petal_width)) | ||||||
|  |         species_dict[species] += 1 | ||||||
|  |  | ||||||
|  |     sepal_length_list.sort() | ||||||
|  |     sepal_width_list.sort() | ||||||
|  |     petal_length_list.sort() | ||||||
|  |     petal_width_list.sort() | ||||||
|  |  | ||||||
|  |     # calculate results | ||||||
|  |     species_data = calc_species_data(species_dict) | ||||||
|  |  | ||||||
|  |     trait_data = [] | ||||||
|  |  | ||||||
|  |     for lst in [sepal_length_list, sepal_width_list, petal_length_list, petal_width_list]: | ||||||
|  |         trait_data.append(calc_data(lst)) | ||||||
|  |  | ||||||
|  |     # print results | ||||||
|  |     print_data(species_data, trait_data) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | if __name__ == "__main__": | ||||||
|  |     import os.path | ||||||
|  |     assert os.path.isfile('data.csv') | ||||||
|  |     main() | ||||||
		Reference in New Issue
	
	Block a user