EDA DataFrame пропустит и нулевые значения
def describe_missing_zeros_values(df: pd.core.frame.DataFrame) -> pd.core.frame.DataFrame:
"""Describe Missing and Zero Valued columns in a dataframe.
Args:
df (pd.core.frame.DataFrame): Dataframe under analysis.
Returns:
pd.core.frame.Dateframe: Dataframe with missing and zero valued columns and their statistics.
"""
zero_values = (df == 0.00).astype(int).sum(axis=0)
missing_values = df.isnull().sum()
missing_values_percent = missing_values * 100 / len(df)
missing_zero_df = pd.concat([zero_values, missing_values, missing_values_percent], axis=1)
missing_zero_df = missing_zero_df.rename(
columns = {0 : "Zero Values", 1 : "Missing Values", 2 : "% Missing Values"})
missing_zero_df["Total Zero & Missing Values"] = missing_zero_df["Zero Values"] + missing_zero_df["Missing Values"]
missing_zero_df["% Total Zero & Missing Values"] = 100 * missing_zero_df["Total Zero & Missing Values"] / len(df)
missing_zero_df["Data Type"] = df.dtypes
missing_zero_df = missing_zero_df[missing_zero_df.iloc[:,1] != 0].sort_values("% Missing Values", ascending=False).round(1)
print(f"Your selected dataframe has {df.shape[0]} rows {df.shape[1]} columns.")
print(f"There are {missing_zero_df.shape[0]} columns that have missing values.")
return missing_zero_df
Scarlet Macaw