zad 1
This commit is contained in:
commit
40f642966c
15
zad1/README.txt
Normal file
15
zad1/README.txt
Normal file
@ -0,0 +1,15 @@
|
||||
CHARAKTERYSTYKA GATUNKÓW IRYSÓW
|
||||
|
||||
Liczba klas (gatunków): 3
|
||||
|
||||
Liczba obserwacji: 150 (50 dla każdego gatunku)
|
||||
|
||||
Atrybuty (kolumny):
|
||||
1. długość działki kielicha (ang. sepal length) [cm]
|
||||
2. szerokość działki kielicha (ang. sepal width) [cm]
|
||||
3. długość płatka (ang. petal length) [cm]
|
||||
4. szerokość płatka (ang. petal width) [cm]
|
||||
5. gatunek (ang. species):
|
||||
0 - setosa
|
||||
1 - versicolor
|
||||
2 - virginica
|
150
zad1/data.csv
Normal file
150
zad1/data.csv
Normal file
@ -0,0 +1,150 @@
|
||||
5.1,3.5,1.4,0.2,0
|
||||
4.9,3.0,1.4,0.2,0
|
||||
4.7,3.2,1.3,0.2,0
|
||||
4.6,3.1,1.5,0.2,0
|
||||
5.0,3.6,1.4,0.2,0
|
||||
5.4,3.9,1.7,0.4,0
|
||||
4.6,3.4,1.4,0.3,0
|
||||
5.0,3.4,1.5,0.2,0
|
||||
4.4,2.9,1.4,0.2,0
|
||||
4.9,3.1,1.5,0.1,0
|
||||
5.4,3.7,1.5,0.2,0
|
||||
4.8,3.4,1.6,0.2,0
|
||||
4.8,3.0,1.4,0.1,0
|
||||
4.3,3.0,1.1,0.1,0
|
||||
5.8,4.0,1.2,0.2,0
|
||||
5.7,4.4,1.5,0.4,0
|
||||
5.4,3.9,1.3,0.4,0
|
||||
5.1,3.5,1.4,0.3,0
|
||||
5.7,3.8,1.7,0.3,0
|
||||
5.1,3.8,1.5,0.3,0
|
||||
5.4,3.4,1.7,0.2,0
|
||||
5.1,3.7,1.5,0.4,0
|
||||
4.6,3.6,1.0,0.2,0
|
||||
5.1,3.3,1.7,0.5,0
|
||||
4.8,3.4,1.9,0.2,0
|
||||
5.0,3.0,1.6,0.2,0
|
||||
5.0,3.4,1.6,0.4,0
|
||||
5.2,3.5,1.5,0.2,0
|
||||
5.2,3.4,1.4,0.2,0
|
||||
4.7,3.2,1.6,0.2,0
|
||||
4.8,3.1,1.6,0.2,0
|
||||
5.4,3.4,1.5,0.4,0
|
||||
5.2,4.1,1.5,0.1,0
|
||||
5.5,4.2,1.4,0.2,0
|
||||
4.9,3.1,1.5,0.2,0
|
||||
5.0,3.2,1.2,0.2,0
|
||||
5.5,3.5,1.3,0.2,0
|
||||
4.9,3.6,1.4,0.1,0
|
||||
4.4,3.0,1.3,0.2,0
|
||||
5.1,3.4,1.5,0.2,0
|
||||
5.0,3.5,1.3,0.3,0
|
||||
4.5,2.3,1.3,0.3,0
|
||||
4.4,3.2,1.3,0.2,0
|
||||
5.0,3.5,1.6,0.6,0
|
||||
5.1,3.8,1.9,0.4,0
|
||||
4.8,3.0,1.4,0.3,0
|
||||
5.1,3.8,1.6,0.2,0
|
||||
4.6,3.2,1.4,0.2,0
|
||||
5.3,3.7,1.5,0.2,0
|
||||
5.0,3.3,1.4,0.2,0
|
||||
7.0,3.2,4.7,1.4,1
|
||||
6.4,3.2,4.5,1.5,1
|
||||
6.9,3.1,4.9,1.5,1
|
||||
5.5,2.3,4.0,1.3,1
|
||||
6.5,2.8,4.6,1.5,1
|
||||
5.7,2.8,4.5,1.3,1
|
||||
6.3,3.3,4.7,1.6,1
|
||||
4.9,2.4,3.3,1.0,1
|
||||
6.6,2.9,4.6,1.3,1
|
||||
5.2,2.7,3.9,1.4,1
|
||||
5.0,2.0,3.5,1.0,1
|
||||
5.9,3.0,4.2,1.5,1
|
||||
6.0,2.2,4.0,1.0,1
|
||||
6.1,2.9,4.7,1.4,1
|
||||
5.6,2.9,3.6,1.3,1
|
||||
6.7,3.1,4.4,1.4,1
|
||||
5.6,3.0,4.5,1.5,1
|
||||
5.8,2.7,4.1,1.0,1
|
||||
6.2,2.2,4.5,1.5,1
|
||||
5.6,2.5,3.9,1.1,1
|
||||
5.9,3.2,4.8,1.8,1
|
||||
6.1,2.8,4.0,1.3,1
|
||||
6.3,2.5,4.9,1.5,1
|
||||
6.1,2.8,4.7,1.2,1
|
||||
6.4,2.9,4.3,1.3,1
|
||||
6.6,3.0,4.4,1.4,1
|
||||
6.8,2.8,4.8,1.4,1
|
||||
6.7,3.0,5.0,1.7,1
|
||||
6.0,2.9,4.5,1.5,1
|
||||
5.7,2.6,3.5,1.0,1
|
||||
5.5,2.4,3.8,1.1,1
|
||||
5.5,2.4,3.7,1.0,1
|
||||
5.8,2.7,3.9,1.2,1
|
||||
6.0,2.7,5.1,1.6,1
|
||||
5.4,3.0,4.5,1.5,1
|
||||
6.0,3.4,4.5,1.6,1
|
||||
6.7,3.1,4.7,1.5,1
|
||||
6.3,2.3,4.4,1.3,1
|
||||
5.6,3.0,4.1,1.3,1
|
||||
5.5,2.5,4.0,1.3,1
|
||||
5.5,2.6,4.4,1.2,1
|
||||
6.1,3.0,4.6,1.4,1
|
||||
5.8,2.6,4.0,1.2,1
|
||||
5.0,2.3,3.3,1.0,1
|
||||
5.6,2.7,4.2,1.3,1
|
||||
5.7,3.0,4.2,1.2,1
|
||||
5.7,2.9,4.2,1.3,1
|
||||
6.2,2.9,4.3,1.3,1
|
||||
5.1,2.5,3.0,1.1,1
|
||||
5.7,2.8,4.1,1.3,1
|
||||
6.3,3.3,6.0,2.5,2
|
||||
5.8,2.7,5.1,1.9,2
|
||||
7.1,3.0,5.9,2.1,2
|
||||
6.3,2.9,5.6,1.8,2
|
||||
6.5,3.0,5.8,2.2,2
|
||||
7.6,3.0,6.6,2.1,2
|
||||
4.9,2.5,4.5,1.7,2
|
||||
7.3,2.9,6.3,1.8,2
|
||||
6.7,2.5,5.8,1.8,2
|
||||
7.2,3.6,6.1,2.5,2
|
||||
6.5,3.2,5.1,2.0,2
|
||||
6.4,2.7,5.3,1.9,2
|
||||
6.8,3.0,5.5,2.1,2
|
||||
5.7,2.5,5.0,2.0,2
|
||||
5.8,2.8,5.1,2.4,2
|
||||
6.4,3.2,5.3,2.3,2
|
||||
6.5,3.0,5.5,1.8,2
|
||||
7.7,3.8,6.7,2.2,2
|
||||
7.7,2.6,6.9,2.3,2
|
||||
6.0,2.2,5.0,1.5,2
|
||||
6.9,3.2,5.7,2.3,2
|
||||
5.6,2.8,4.9,2.0,2
|
||||
7.7,2.8,6.7,2.0,2
|
||||
6.3,2.7,4.9,1.8,2
|
||||
6.7,3.3,5.7,2.1,2
|
||||
7.2,3.2,6.0,1.8,2
|
||||
6.2,2.8,4.8,1.8,2
|
||||
6.1,3.0,4.9,1.8,2
|
||||
6.4,2.8,5.6,2.1,2
|
||||
7.2,3.0,5.8,1.6,2
|
||||
7.4,2.8,6.1,1.9,2
|
||||
7.9,3.8,6.4,2.0,2
|
||||
6.4,2.8,5.6,2.2,2
|
||||
6.3,2.8,5.1,1.5,2
|
||||
6.1,2.6,5.6,1.4,2
|
||||
7.7,3.0,6.1,2.3,2
|
||||
6.3,3.4,5.6,2.4,2
|
||||
6.4,3.1,5.5,1.8,2
|
||||
6.0,3.0,4.8,1.8,2
|
||||
6.9,3.1,5.4,2.1,2
|
||||
6.7,3.1,5.6,2.4,2
|
||||
6.9,3.1,5.1,2.3,2
|
||||
5.8,2.7,5.1,1.9,2
|
||||
6.8,3.2,5.9,2.3,2
|
||||
6.7,3.3,5.7,2.5,2
|
||||
6.7,3.0,5.2,2.3,2
|
||||
6.3,2.5,5.0,1.9,2
|
||||
6.5,3.0,5.2,2.0,2
|
||||
6.2,3.4,5.4,2.3,2
|
||||
5.9,3.0,5.1,1.8,2
|
|
207
zad1/main.py
Normal file
207
zad1/main.py
Normal file
@ -0,0 +1,207 @@
|
||||
"""
|
||||
Komputerowa analiza danych
|
||||
Zadanie 1
|
||||
Michał Leśniak 195642
|
||||
"""
|
||||
import math
|
||||
import locale
|
||||
locale.setlocale(locale.LC_ALL, '')
|
||||
|
||||
SPECIES_NAMES = {
|
||||
'0': 'setosa',
|
||||
'1': 'versicolor',
|
||||
'2': 'virginica'
|
||||
}
|
||||
|
||||
ATTRIBUTE_NAMES = ['Długość działki kielicha [cm]',
|
||||
'Szerokość działki kielicha [cm]',
|
||||
'Długość płatka [cm]',
|
||||
'Szerokość płatka [cm]']
|
||||
|
||||
|
||||
def mean(lst):
|
||||
sum = 0
|
||||
for i in lst:
|
||||
sum += i
|
||||
return sum/len(lst)
|
||||
|
||||
|
||||
def median(lst, is_even):
|
||||
return (lst[len(lst)//2-1]+lst[len(lst)//2])/2 if is_even else lst[len(lst)//2]
|
||||
|
||||
|
||||
def sample_standard_deviation(lst, lst_mean):
|
||||
sum = 0
|
||||
for x in [(y-lst_mean)**2 for y in lst]:
|
||||
sum += x
|
||||
return math.sqrt(sum/(len(lst)-1))
|
||||
|
||||
|
||||
def q1(lst, is_even, lst_median):
|
||||
idx = len(lst)//2-1 if is_even else len(lst)//2+1
|
||||
sorted_list = lst[:idx]
|
||||
if is_even:
|
||||
sorted_list.append(lst_median)
|
||||
|
||||
return median(sorted_list, is_even)
|
||||
|
||||
|
||||
def q3(lst, is_even, lst_median):
|
||||
idx = len(lst)//2+1 if is_even else len(lst)//2
|
||||
sorted_list = lst[idx:]
|
||||
if is_even:
|
||||
sorted_list.insert(0, lst_median)
|
||||
|
||||
return median(sorted_list, is_even)
|
||||
|
||||
|
||||
def min(lst):
|
||||
return lst[0]
|
||||
|
||||
|
||||
def max(lst):
|
||||
return lst[-1]
|
||||
|
||||
|
||||
def calc_data(lst):
|
||||
is_even = len(lst) % 2 == 0
|
||||
lst_mean = mean(lst)
|
||||
lst_median = median(lst, is_even)
|
||||
return min(lst), lst_mean, sample_standard_deviation(lst, lst_mean), lst_median, q1(lst, is_even, lst_median), q3(lst, is_even, lst_median), max(lst)
|
||||
|
||||
|
||||
def calc_species_data(species):
|
||||
total = 0
|
||||
for x in species.values():
|
||||
total += x
|
||||
|
||||
species_list = []
|
||||
for key in species:
|
||||
species_list.append(
|
||||
(SPECIES_NAMES[key], species[key], species[key]/total))
|
||||
|
||||
species_list.append(("Razem", total, 1.0))
|
||||
|
||||
return species_list
|
||||
|
||||
|
||||
def percentage_format(x):
|
||||
return locale.format_string('%.1f%%', x*100)
|
||||
|
||||
|
||||
def float_format(x):
|
||||
return locale.format_string('%.2f', x)
|
||||
|
||||
|
||||
def print_table(table):
|
||||
if not table:
|
||||
return
|
||||
cell_sizes = [0] * len(table[0])
|
||||
for i in range(len(table[0])):
|
||||
for x in table:
|
||||
if not x:
|
||||
continue
|
||||
if len(x[i]) > cell_sizes[i]:
|
||||
cell_sizes[i] = len(x[i])
|
||||
|
||||
header_row_format = ' | '.join((f'{{:^{x}}}' for x in cell_sizes))
|
||||
row_format = f'{{:<{cell_sizes[0]}}} | ' + \
|
||||
' | '.join((f'{{:^{x}}}' for x in cell_sizes[1:]))
|
||||
|
||||
lines = []
|
||||
for x in table:
|
||||
if x == table[0]:
|
||||
lines.append(header_row_format.format(*x))
|
||||
continue
|
||||
if x is None:
|
||||
lines.append(None)
|
||||
continue
|
||||
lines.append(row_format.format(*x))
|
||||
row_size = 0
|
||||
for x in lines:
|
||||
if x and len(x) > row_size:
|
||||
row_size = len(x)
|
||||
row_size += 1
|
||||
lines.insert(1, '='*row_size)
|
||||
lines.insert(0, '='*row_size)
|
||||
lines.insert(len(lines), '='*row_size)
|
||||
for i in range(len(lines)):
|
||||
if lines[i] is None:
|
||||
lines[i] = '-'*row_size
|
||||
for x in lines:
|
||||
print(x)
|
||||
|
||||
|
||||
def print_data(species_data, trait_data):
|
||||
species_printable_table = []
|
||||
species_printable_table.append(("Gatunek", "Liczebność (%)"))
|
||||
for x in species_data:
|
||||
if x == species_data[-1]:
|
||||
species_printable_table.append(None)
|
||||
species_printable_table.append(
|
||||
(x[0], "{} ({})".format(x[1], percentage_format(x[2]))))
|
||||
|
||||
trait_printable_table = []
|
||||
trait_printable_table.append(
|
||||
("Cecha", "Minimum", "Śr. arytm. (± odch. stand.)", "Mediana (Q1 - Q3)", "Maksimum"))
|
||||
for i in range(len(trait_data)):
|
||||
trait_printable_table.append((ATTRIBUTE_NAMES[i],
|
||||
float_format(trait_data[i][0]),
|
||||
"{} (±{})".format(float_format(
|
||||
trait_data[i][1]), float_format(trait_data[i][2])),
|
||||
"{} ({} - {})".format(
|
||||
float_format(trait_data[i][3]),
|
||||
float_format(trait_data[i][4]),
|
||||
float_format(trait_data[i][5])),
|
||||
float_format(trait_data[i][6])))
|
||||
|
||||
print_table(species_printable_table)
|
||||
print()
|
||||
print_table(trait_printable_table)
|
||||
|
||||
|
||||
def main():
|
||||
# read data
|
||||
with open("data.csv", "r") as f:
|
||||
data = f.read().splitlines()
|
||||
|
||||
# parse data
|
||||
sepal_length_list = []
|
||||
sepal_width_list = []
|
||||
petal_length_list = []
|
||||
petal_width_list = []
|
||||
species_dict = {
|
||||
'0': 0,
|
||||
'1': 0,
|
||||
'2': 0,
|
||||
}
|
||||
for line in data:
|
||||
sepal_length, sepal_width, petal_length, petal_width, species = line.split(
|
||||
',')
|
||||
sepal_length_list.append(float(sepal_length))
|
||||
sepal_width_list.append(float(sepal_width))
|
||||
petal_length_list.append(float(petal_length))
|
||||
petal_width_list.append(float(petal_width))
|
||||
species_dict[species] += 1
|
||||
|
||||
sepal_length_list.sort()
|
||||
sepal_width_list.sort()
|
||||
petal_length_list.sort()
|
||||
petal_width_list.sort()
|
||||
|
||||
# calculate results
|
||||
species_data = calc_species_data(species_dict)
|
||||
|
||||
trait_data = []
|
||||
|
||||
for lst in [sepal_length_list, sepal_width_list, petal_length_list, petal_width_list]:
|
||||
trait_data.append(calc_data(lst))
|
||||
|
||||
# print results
|
||||
print_data(species_data, trait_data)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import os.path
|
||||
assert os.path.isfile('data.csv')
|
||||
main()
|
Loading…
Reference in New Issue
Block a user