import numpy import pandas as pd import matplotlib.pyplot as plt import seaborn as sns def loadCatsDatabase(): df = pd.read_csv('./cats/cats_dataset.csv') return df def avg_age(df: pd.DataFrame, breed: str|None = None) -> float: new_df = df if breed: new_df = df[df["Breed"] == breed] return numpy.round(new_df['Age (Years)'].astype(int).sum()/len(new_df['Age (Years)']), 2) def high_age_breed(df: pd.DataFrame) -> tuple[str, float]: vals: list[tuple[str, float]] = [] for breed in df["Breed"].unique(): vals.append((breed, avg_age(df, str(breed)))) highest = vals[0] for (b, v) in vals[1:]: if v > highest[1]: highest = (b, v) return highest def weight_avg(df: pd.DataFrame) -> float: return numpy.round(df["Weight (kg)"].astype(float).sum()/len(df["Weight (kg)"]), 2) def gender_heavier(df: pd.DataFrame) -> str: male_avg = weight_avg(df[df["Gender"] == "Male"]) female_avg = weight_avg(df[df["Gender"] == "Female"]) return "male" if male_avg > female_avg else "female" def correlation(df: pd.DataFrame) -> bool: corr = df["Age (Years)"].corr(df["Weight (kg)"]) return abs(corr) > 0.9 def cat_age_histogram(df: pd.DataFrame) -> None: ages = df["Age (Years)"].astype(int) _ = sns.histplot(data=ages) plt.show() print("Running tests...", end="") try: df = loadCatsDatabase() assert(avg_age(df) == 10.21) # Q1 assert(high_age_breed(df) == ("Himalayan", 11.67)) # Q2 assert(gender_heavier(df) == "female") # Q3 assert(not correlation(df)) # Q4 cat_age_histogram(df) print("Passed!") except: print("Failed :(")