xxxxxxxxxx
pd.get_dummies(obj_df, columns=["body_style", "drive_wheels"], prefix=["body", "drive"]).head()
xxxxxxxxxx
from sklearn import preprocessing
lab_encoder = preprocessing.LabelEncoder()
df['column'] = lab_encoder.fit_transform(df['column'])
xxxxxxxxxx
# get all categorical columns in the dataframe
catCols = [col for col in df1.columns if df1[col].dtype=="O"]
from sklearn.preprocessing import LabelEncoder
lb_make = LabelEncoder()
for item in catCols:
df1[item] = lb_make.fit_transform(df1[item])
xxxxxxxxxx
#this will label as one hot vectors (origin is split into 3 columns - USA, Europe, Japan and any one place will be 1 while the others are 0)
dataset['Origin'] = dataset['Origin'].map({1: 'USA', 2: 'Europe', 3: 'Japan'})
xxxxxxxxxx
pd.cut(df.Age,bins=[0,2,17,65,99],labels=['Toddler/Baby','Child','Adult','Elderly'])
# where bins is cut off points of bins for the continuous data
# and key things here is that no. of labels is always less than 1
xxxxxxxxxx
obj_df["body_style"] = obj_df["body_style"].astype('category')
obj_df.dtypes
xxxxxxxxxx
## Converting Age to numeric variable
df['Gender']=pd.get_dummies(df['Gender'],drop_first=1)
df.head()
xxxxxxxxxx
import pandas as pd
# Example DataFrame with a 'smoker' column containing 'yes' and 'no'
data = {'smoker': ['yes', 'no', 'no', 'yes', 'yes', 'no']}
df = pd.DataFrame(data)
# Using the replace method to convert 'yes' to 1 and 'no' to 0
df['smoker_encoded'] = df['smoker'].replace({'yes': 1, 'no': 0})
print(df)
xxxxxxxxxx
cat_cols = ['Item_Identifier', 'Item_Fat_Content', 'Item_Type', 'Outlet_Identifier',
'Outlet_Size', 'Outlet_Location_Type', 'Outlet_Type', 'Item_Type_Combined']
enc = LabelEncoder()
for col in cat_cols:
train[col] = train[col].astype('str')
test[col] = test[col].astype('str')
train[col] = enc.fit_transform(train[col])
test[col] = enc.transform(test[col])