xxxxxxxxxx
# For dropping duplicate columns:
df = df.loc[:,~df.columns.duplicated()]
xxxxxxxxxx
import pandas as pd
# Drop all duplicates in the DataFrame
df = df.drop_duplicates()
# Drop all duplicates in a specific column of the DataFrame
df = df.drop_duplicates(subset = "column")
# Drop all duplicate pairs in DataFrame
df = df.drop_duplicates(subset = ["column", "column2"])
# Display DataFrame
print(df)
xxxxxxxxxx
import pandas as pd
# making data frame from csv file
data = pd.read_csv("employees.csv")
# sorting by first name
data.sort_values("First Name", inplace = True)
# dropping ALL duplicte values
data.drop_duplicates(subset ="First Name",keep = False, inplace = True)
# displaying data
print(data)
xxxxxxxxxx
# Below are quick example
# keep first duplicate row
df2 = df.drop_duplicates()
# Using DataFrame.drop_duplicates() to keep first duplicate row
df2 = df.drop_duplicates(keep='first')
# keep last duplicate row
df2 = df.drop_duplicates( keep='last')
# Remove all duplicate rows
df2 = df.drop_duplicates(keep=False)
# Delete duplicate rows based on specific columns
df2 = df.drop_duplicates(subset=["Courses", "Fee"], keep=False)
# Drop duplicate rows in place
df.drop_duplicates(inplace=True)
# Using DataFrame.apply() and lambda function
df2 = df.apply(lambda x: x.astype(str).str.lower()).drop_duplicates(subset=['Courses', 'Fee'], keep='first')
borrar duplicados pandas
xxxxxxxxxx
# Below are quick example
# keep first duplicate row
df2 = df.drop_duplicates()
# Using DataFrame.drop_duplicates() to keep first duplicate row
df2 = df.drop_duplicates(keep='first')
# keep last duplicate row
df2 = df.drop_duplicates( keep='last')
# Remove all duplicate rows
df2 = df.drop_duplicates(keep=False)
# Delete duplicate rows based on specific columns
df2 = df.drop_duplicates(subset=["Courses", "Fee"], keep=False)
# Drop duplicate rows in place
df.drop_duplicates(inplace=True)
# Using DataFrame.apply() and lambda function
df2 = df.apply(lambda x: x.astype(str).str.lower()).drop_duplicates(subset=['Courses', 'Fee'], keep='first')
xxxxxxxxxx
# Drop duplicate columns
df2 = df.T.drop_duplicates().T
print(df2)
xxxxxxxxxx
dataFrame.drop_duplicates(subset=None, keep='first', inplace=False)