import pandas as pd
import numpy as np
# Create a sample DataFrame with missing values
data = {
'A': [1, 2, np.nan, 4, 5],
'B': [1, 2, 3, np.nan, 5],
'C': [1, 2, 3, 4, 5],
}
df = pd.DataFrame(data)
def clean_dataframe(df):
# Drop rows with more than 80% NaN
df = df.dropna(thresh=0.8 * df.shape[1], axis=0)
# Drop columns with more than 30% NaN
df = df.dropna(thresh=0.7 * df.shape[0], axis=1)
return df
# Clean the DataFrame
cleaned_df = clean_dataframe(df)
# Display the original and cleaned DataFrames
print("Original DataFrame:")
print(df)
print("\nCleaned DataFrame:")
print(cleaned_df)