xxxxxxxxxx
df_permutated = df.sample(frac=1)
train_size = 0.8
train_end = int(len(df_permutated)*train_size)
df_train = df_permutated[:train_end]
df_test = df_permutated[train_end:]
xxxxxxxxxx
from sklearn.model_selection import train_test_split
y = df.pop('output')
X = df
X_train,X_test,y_train,y_test = train_test_split(X.index,y,test_size=0.2)
X.iloc[X_train] # return dataframe train
xxxxxxxxxx
train=df.sample(frac=0.8,random_state=200) #random state is a seed value
test=df.drop(train.index)
xxxxxxxxxx
from sklearn.model_selection import train_test_split
train, test = train_test_split(df, test_size=0.2)
xxxxxxxxxx
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
xxxxxxxxxx
train_inds, test_inds = next(GroupShuffleSplit(test_size=.20, n_splits=2, random_state = 7).split(df, groups=df['Group_Id']))
train = df.iloc[train_inds]
test = df.iloc[test_inds]
xxxxxxxxxx
# Dataframe splitting helper function
def SplitDataframe(df, y_column, test_size=3):
train_count = int(round(test_size*10/len(df)*100))
train_ds = df[train_count:]
test_ds = df[:train_count]
train_ds_X = train_ds.drop([y_column], axis=1)
train_ds_y = train_ds[y_column]
test_ds_X = test_ds.drop([y_column], axis=1)
test_ds_y = test_ds[y_column]
return (train_ds_X, train_ds_y), (test_ds_X, test_ds_y)
xxxxxxxxxx
import numpy as np
# Randomly take 80% index as mask
mask = np.random.rand(len(df)) < 0.8
# Take features
df = df[['A','B','C','D']]
# Use index mask to pull out 80% training data
train_df = df[mask]
X_Train = train_df[['A','B','C']]
Y_Train = train_df['D']
# Use negation mask to pull out remaining testing data
test_df = df[~mask]
X_Test = test_df[['A','B','C']]
Y_Test = test_df['D']