CMU Coding Bootcamp
1import numpy
2import pandas as pd
3import matplotlib.pyplot as plt
4import seaborn as sns
5
6
7def loadCatsDatabase():
8 df = pd.read_csv('./cats/cats_dataset.csv')
9 return df
10
11
12def avg_age(df: pd.DataFrame, breed: str|None = None) -> float:
13 new_df = df
14 if breed:
15 new_df = df[df["Breed"] == breed]
16 return numpy.round(new_df['Age (Years)'].astype(int).sum()/len(new_df['Age (Years)']), 2)
17
18def high_age_breed(df: pd.DataFrame) -> tuple[str, float]:
19 vals: list[tuple[str, float]] = []
20 for breed in df["Breed"].unique():
21 vals.append((breed, avg_age(df, str(breed))))
22 highest = vals[0]
23 for (b, v) in vals[1:]:
24 if v > highest[1]:
25 highest = (b, v)
26 return highest
27
28def weight_avg(df: pd.DataFrame) -> float:
29 return numpy.round(df["Weight (kg)"].astype(float).sum()/len(df["Weight (kg)"]), 2)
30
31def gender_heavier(df: pd.DataFrame) -> str:
32 male_avg = weight_avg(df[df["Gender"] == "Male"])
33 female_avg = weight_avg(df[df["Gender"] == "Female"])
34 return "male" if male_avg > female_avg else "female"
35
36def correlation(df: pd.DataFrame) -> bool:
37 corr = df["Age (Years)"].corr(df["Weight (kg)"])
38 return abs(corr) > 0.9
39
40def cat_age_histogram(df: pd.DataFrame) -> None:
41 ages = df["Age (Years)"].astype(int)
42 _ = sns.histplot(data=ages)
43 plt.show()
44
45print("Running tests...", end="")
46try:
47 df = loadCatsDatabase()
48 assert(avg_age(df) == 10.21) # Q1
49 assert(high_age_breed(df) == ("Himalayan", 11.67)) # Q2
50 assert(gender_heavier(df) == "female") # Q3
51 assert(not correlation(df)) # Q4
52 cat_age_histogram(df)
53 print("Passed!")
54except:
55 print("Failed :(")