xxxxxxxxxx
In [21]: g1.add_suffix('_Count').reset_index()
Out[21]:
Name City City_Count Name_Count
0 Alice Seattle 1 1
1 Bob Seattle 2 2
2 Mallory Portland 2 2
3 Mallory Seattle 1 1
xxxxxxxxxx
pd.DataFrame( {'a':['A','A','B','B','B','C'], 'b':[1,2,5,5,4,6]})
df.groupby('a')['b'].apply(list)
Out:
a
A [1, 2]
B [5, 5, 4]
C [6]
Name: b, dtype: object
xxxxxxxxxx
#calculate sum of sales grouped by month
df.groupby(df.date.dt.month)['sales'].sum()
date
1 34
2 44
3 31
Name: sales, dtype: int64
xxxxxxxxxx
# Group "another_col" column by "col1" and "col2" and
# produce min, max and sum of the grouped data
df.groupby(["col1","col2"], as_index=False)["another_col"].agg([min,max,sum])
# Way 2
df.groupby("cat_col").agg({"col1": ["mean", "std"], "col2": ["median"]})
# Way 3
books.groupby("some_col").agg(
mean_col1=("col1", "mean"),
std_col2=("col2", "std"),
median_col3=("col3", "median")
)
# Multi-index groupby
df.groupby(level=0).agg({'col':'mean'}) # Outermost = level 0
# Size per group
df.groupby('col').size()
xxxxxxxxxx
# usage example
gb = df.groupby(["col1", "col2"])
counts = gb.size().to_frame(name="counts")
count
(
counts.join(gb.agg({"col3": "mean"}).rename(columns={"col3": "col3_mean"}))
.join(gb.agg({"col4": "median"}).rename(columns={"col4": "col4_median"}))
.join(gb.agg({"col4": "min"}).rename(columns={"col4": "col4_min"}))
.reset_index()
)
# to create dataframe
keys = np.array(
[
["A", "B"],
["A", "B"],
["A", "B"],
["A", "B"],
["C", "D"],
["C", "D"],
["C", "D"],
["E", "F"],
["E", "F"],
["G", "H"],
]
)
df = pd.DataFrame(
np.hstack([keys, np.random.randn(10, 4).round(2)]), columns=["col1", "col2", "col3", "col4", "col5", "col6"]
)
df[["col3", "col4", "col5", "col6"]] = df[["col3", "col4", "col5", "col6"]].astype(float)
xxxxxxxxxx
# car_sales:> it is dataframe
# "Make" is column, or feature name
# operation is mean
car_sales.groupby(["Make"]).mean()
xxxxxxxxxx
$users = User::select('name')->groupBy('name')->get()->toArray() ;