# Show number of missing data
df.isna().sum()
# Visualize missing data information
import missingno as msno
import matplotlib.pyplot as plt
msno.matrix(airquality)
plt.show()
# Drop missing data
df_dropped = df.dropna(subset = ['col'])
# Replace/impute missing data with single value
col_mean = df['col'].mean()
df_imputed = df.fillna(value = {'col': col_mean}, axis = 1)
# Replace/impute missing data with series
series_imp = df['col1'] * 5
df_imputed = df.fillna({'col2':series_imp})
df["col1"].fillna(df["col2"], inplace=True)
# Missing values are not always "NaN". They can be blank, "?" or other symbols (rarely)
# Check for values through manual validations first
df["col"].value_counts() # Look out for suspicious values
# Determine number of missing values in a column
df.isna().any()
df['col'].isnull().sum()
# Drop missing values
df.dropna(axis = 0) # Drop entire row for missing value (default)
df.dropna(axis = 1) # Drop entire column for missing value
# Drop missing values for specific column
df.dropna(subset = ["col"], axis = 0)
# Replace missing values
df["col"].replace(np.nan, new_val)
df.fillna(0)