xxxxxxxxxx
##sklearn train test split
from sklearn.model_selection import train_test_split
X = df.drop(['target'],axis=1).values # independant features
y = df['target'].values # dependant variable
# Choose your test size to split between training and testing sets:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
#OR Randomly split your whole dataset to your desired percentage, insted of using a ttarget variable:
training_data = df.sample(frac=0.8, random_state=25) #here we choose 80% as our training sample and for reproduciblity, we use random_state of 42
testing_data = df.drop(training_data.index) # testing sample is 20% of our initial data
xxxxxxxxxx
from sklearn.model_selection import train_test_split
X = df.drop(['target'],axis=1).values # independant features
y = df['target'].values # dependant variable
# Choose your test size to split between training and testing sets:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
xxxxxxxxxx
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
xxxxxxxxxx
import numpy as np
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.33, random_state=42
)
xxxxxxxxxx
# import packages
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
# importing and inspecting data
df = pd.read_csv('prediction.csv')
print(df.shape)
print(df.head())
print(df.columns)
X= df['area']
y=df['prices']
# using the train test split function
X_train, X_test, y_train, y_test = train_test_split(
X,y , random_state=104,test_size=0.25, shuffle=True)
# importing and inspecting training and testing data
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
# Alternative approach
split_ratio = 0.8 # 80% for training, 20% for testing
# Shuffle the DataFrame rows randomly
df_shuffled = df.sample(frac=1, random_state=42)
# Calculate the split index
split_index = int(len(df_shuffled) * split_ratio)
# Split the DataFrame
train_df = df_shuffled.iloc[:split_index]
test_df = df_shuffled.iloc[split_index:]
xxxxxxxxxx
#Let us now split the dataset into train & test
from sklearn.model_selection import train_test_split
x_train,x_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state=0)
print("x_train ",x_train.shape)
print("x_test ",x_test.shape)
print("y_train ",y_train.shape)
print("y_test ",y_test.shape)
xxxxxxxxxx
from sklearn.model_selection import train_test_split
X = df.drop("target", axis=1)
y = df["target"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
xxxxxxxxxx
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
xxxxxxxxxx
import sklearn.model_selection as model_selectionX_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, train_size=0.65,test_size=0.35, random_state=101)print ("X_train: ", X_train)print ("y_train: ", y_train)print("X_test: ", X_test)print ("y_test: ", y_test)
xxxxxxxxxx
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
xxxxxxxxxx
X = np.arange(10).reshape((5, 2))
y = range(5)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)