xxxxxxxxxx
df.groupby(['A','C'], as_index=False)['B'].sum()
xxxxxxxxxx
df.groupby(['col1','col2']).agg({'col3':'sum','col4':'sum'}).reset_index()
xxxxxxxxxx
df['new_column'] = df.groupby(['group_column'])['sum_column'].transform('sum')
xxxxxxxxxx
df.groupby(['att1', 'att2']).agg({'att1': "count", 'att3': "sum",'att4': 'mean'})
xxxxxxxxxx
In [2]: df = pd.DataFrame({'kind': ['cat', 'dog', 'cat', 'dog'],
'height': [9.1, 6.0, 9.5, 34.0], :
'weight': [7.9, 7.5, 9.9, 198.0]}) :
:
In [3]: df
Out[3]:
kind height weight
0 cat 9.1 7.9
1 dog 6.0 7.5
2 cat 9.5 9.9
3 dog 34.0 198.0
In [4]: df.groupby('kind').agg(min_height=('height', 'min'),
max_weight=('weight', 'max'))
Out[4]:
min_height max_weight
kind
cat 9.1 9.9
dog 6.0 198.0
xxxxxxxxxx
#UPDATED (June 2020): Introduced in Pandas 0.25.0,
#Pandas has added new groupby behavior “named aggregation” and tuples,
#for naming the output columns when applying multiple aggregation functions
#to specific columns.
df.groupby(
['col1','col2']
).agg(
sum_col3 = ('col3','sum'),
sum_col4 = ('col4','sum'),
).reset_index()