xxxxxxxxxx
df3 = df3[~df3.index.duplicated(keep='first')]
xxxxxxxxxx
import pandas as pd
# Drop all duplicates in the DataFrame
df = df.drop_duplicates()
# Drop all duplicates in a specific column of the DataFrame
df = df.drop_duplicates(subset = "column")
# Drop all duplicate pairs in DataFrame
df = df.drop_duplicates(subset = ["column", "column2"])
# Display DataFrame
print(df)
xxxxxxxxxx
# Remove by index
df = df[df.index.duplicated(keep='first')]
# Other methods to remove duplicates
import pandas as pd
df = df.drop_duplicates()
df = df.drop_duplicates(subset = "column")
df = df.drop_duplicates(subset = ["column", "column2"])
borrar duplicados pandas
xxxxxxxxxx
# Below are quick example
# keep first duplicate row
df2 = df.drop_duplicates()
# Using DataFrame.drop_duplicates() to keep first duplicate row
df2 = df.drop_duplicates(keep='first')
# keep last duplicate row
df2 = df.drop_duplicates( keep='last')
# Remove all duplicate rows
df2 = df.drop_duplicates(keep=False)
# Delete duplicate rows based on specific columns
df2 = df.drop_duplicates(subset=["Courses", "Fee"], keep=False)
# Drop duplicate rows in place
df.drop_duplicates(inplace=True)
# Using DataFrame.apply() and lambda function
df2 = df.apply(lambda x: x.astype(str).str.lower()).drop_duplicates(subset=['Courses', 'Fee'], keep='first')
xxxxxxxxxx
idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'])
idx.drop_duplicates(keep='first')
Index(['lama', 'cow', 'beetle', 'hippo'], dtype='object')
idx.drop_duplicates(keep='last')
Index(['cow', 'beetle','lamb', 'hippo'], dtype='object')
idx.drop_duplicates(keep='False')
Index(['cow', 'beetle','hippo'], dtype='object')
xxxxxxxxxx
dataFrame.drop_duplicates(subset=None, keep='first', inplace=False)