xxxxxxxxxx
train_inds, test_inds = next(GroupShuffleSplit(test_size=.20, n_splits=2, random_state = 7).split(df, groups=df['Group_Id']))
train = df.iloc[train_inds]
test = df.iloc[test_inds]
xxxxxxxxxx
df_permutated = df.sample(frac=1)
train_size = 0.8
train_end = int(len(df_permutated)*train_size)
df_train = df_permutated[:train_end]
df_test = df_permutated[train_end:]
xxxxxxxxxx
import numpy as np
# Randomly take 80% index as mask
mask = np.random.rand(len(df)) < 0.8
# Take features
df = df[['A','B','C','D']]
# Use index mask to pull out 80% training data
train_df = df[mask]
X_Train = train_df[['A','B','C']]
Y_Train = train_df['D']
# Use negation mask to pull out remaining testing data
test_df = df[~mask]
X_Test = test_df[['A','B','C']]
Y_Test = test_df['D']