xxxxxxxxxx
import pandas as pd
import numpy as np
# Create a sample DataFrame with missing values
data = {
'A': [1, 2, np.nan, 4, 5],
'B': [1, 2, 3, np.nan, 5],
'C': [1, 2, 3, 4, 5],
}
df = pd.DataFrame(data)
def clean_dataframe(df):
# Drop rows with more than 80% NaN
df = df.dropna(thresh=0.8 * df.shape[1], axis=0)
# Drop columns with more than 30% NaN
df = df.dropna(thresh=0.7 * df.shape[0], axis=1)
return df
# Clean the DataFrame
cleaned_df = clean_dataframe(df)
# Display the original and cleaned DataFrames
print("Original DataFrame:")
print(df)
print("\nCleaned DataFrame:")
print(cleaned_df)
xxxxxxxxxx
df.dropna() #drop all rows that have any NaN values
df.dropna(how='all')
xxxxxxxxxx
#remove in dataframe but no in the file
df = df[df['column'].notna()]
#remove in dataframe and in the file
df.dropna(subset=['EPS'], how='all', inplace=True)
xxxxxxxxxx
import pandas as pd
df = pd.DataFrame({'values_1': ['700','ABC','500','XYZ','1200'],
'values_2': ['DDD','150','350','400','5000']
})
df = df.apply (pd.to_numeric, errors='coerce')
df = df.dropna()
df = df.reset_index(drop=True)
print (df)