xxxxxxxxxx
#You could just use sklearn.model_selection.train_test_split twice. First to split to train,
#test and then split train again into validation and train.
#Something like this:
X_train, X_test, y_train, y_test
= train_test_split(X, y, test_size=0.2, random_state=1)
X_train, X_val, y_train, y_val
= train_test_split(X_train, y_train, test_size=0.25, random_state=1) # 0.25 x 0.8 = 0.2
xxxxxxxxxx
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
xxxxxxxxxx
import numpy as np
# Randomly take 80% index as mask
mask = np.random.rand(len(df)) < 0.8
# Take features
df = df[['A','B','C','D']]
# Use index mask to pull out 80% training data
train_df = df[mask]
X_Train = train_df[['A','B','C']]
Y_Train = train_df['D']
# Use negation mask to pull out remaining testing data
test_df = df[~mask]
X_Test = test_df[['A','B','C']]
Y_Test = test_df['D']