xxxxxxxxxx
cat_cols = ['Item_Identifier', 'Item_Fat_Content', 'Item_Type', 'Outlet_Identifier',
'Outlet_Size', 'Outlet_Location_Type', 'Outlet_Type', 'Item_Type_Combined']
enc = LabelEncoder()
for col in cat_cols:
train[col] = train[col].astype('str')
test[col] = test[col].astype('str')
train[col] = enc.fit_transform(train[col])
test[col] = enc.transform(test[col])
xxxxxxxxxx
pd.cut(df.Age,bins=[0,2,17,65,99],labels=['Toddler/Baby','Child','Adult','Elderly'])
# where bins is cut off points of bins for the continuous data
# and key things here is that no. of labels is always less than 1
xxxxxxxxxx
obj_df["body_style"] = obj_df["body_style"].astype('category')
obj_df.dtypes
xxxxxxxxxx
pd.get_dummies(obj_df, columns=["body_style", "drive_wheels"], prefix=["body", "drive"]).head()