CMU Coding Bootcamp
at main 1.7 kB view raw
1import numpy 2import pandas as pd 3import matplotlib.pyplot as plt 4import seaborn as sns 5 6 7def loadCatsDatabase(): 8 df = pd.read_csv('./cats/cats_dataset.csv') 9 return df 10 11 12def avg_age(df: pd.DataFrame, breed: str|None = None) -> float: 13 new_df = df 14 if breed: 15 new_df = df[df["Breed"] == breed] 16 return numpy.round(new_df['Age (Years)'].astype(int).sum()/len(new_df['Age (Years)']), 2) 17 18def high_age_breed(df: pd.DataFrame) -> tuple[str, float]: 19 vals: list[tuple[str, float]] = [] 20 for breed in df["Breed"].unique(): 21 vals.append((breed, avg_age(df, str(breed)))) 22 highest = vals[0] 23 for (b, v) in vals[1:]: 24 if v > highest[1]: 25 highest = (b, v) 26 return highest 27 28def weight_avg(df: pd.DataFrame) -> float: 29 return numpy.round(df["Weight (kg)"].astype(float).sum()/len(df["Weight (kg)"]), 2) 30 31def gender_heavier(df: pd.DataFrame) -> str: 32 male_avg = weight_avg(df[df["Gender"] == "Male"]) 33 female_avg = weight_avg(df[df["Gender"] == "Female"]) 34 return "male" if male_avg > female_avg else "female" 35 36def correlation(df: pd.DataFrame) -> bool: 37 corr = df["Age (Years)"].corr(df["Weight (kg)"]) 38 return abs(corr) > 0.9 39 40def cat_age_histogram(df: pd.DataFrame) -> None: 41 ages = df["Age (Years)"].astype(int) 42 _ = sns.histplot(data=ages) 43 plt.show() 44 45print("Running tests...", end="") 46try: 47 df = loadCatsDatabase() 48 assert(avg_age(df) == 10.21) # Q1 49 assert(high_age_breed(df) == ("Himalayan", 11.67)) # Q2 50 assert(gender_heavier(df) == "female") # Q3 51 assert(not correlation(df)) # Q4 52 cat_age_histogram(df) 53 print("Passed!") 54except: 55 print("Failed :(")